From e0b803349d1c3ab14ab3e03116d8508004679d05 Mon Sep 17 00:00:00 2001
From: James Brodman <james.brodman@intel.com>
Date: Thu, 30 Jul 2020 19:01:34 -0400
Subject: [PATCH 01/12] Move general language extensions to the ONEAPI
 namespace

Signed-off-by: James Brodman <james.brodman@intel.com>
---
 clang/lib/Sema/SemaSYCL.cpp                   |  2 +-
 clang/test/CodeGenSYCL/Inputs/sycl.hpp        |  4 +-
 .../CodeGenSYCL/int_header_spec_const.cpp     | 20 ++--
 clang/test/SemaSYCL/Inputs/sycl.hpp           |  2 +-
 .../SemaSYCL/spec_const_and_accesor_crash.cpp |  4 +-
 sycl/include/CL/sycl.hpp                      | 12 +--
 .../CL/sycl/{intel => ONEAPI}/atomic.hpp      | 10 +-
 .../{intel => ONEAPI}/atomic_accessor.hpp     |  8 +-
 .../sycl/{intel => ONEAPI}/atomic_enums.hpp   | 10 +-
 .../sycl/{intel => ONEAPI}/atomic_fence.hpp   |  8 +-
 .../CL/sycl/{intel => ONEAPI}/atomic_ref.hpp  | 20 ++--
 .../CL/sycl/{intel => ONEAPI}/builtins.hpp    |  6 +-
 .../{intel => ONEAPI}/function_pointer.hpp    |  4 +-
 .../CL/sycl/{intel => ONEAPI}/functional.hpp  | 22 ++---
 .../{intel => ONEAPI}/group_algorithm.hpp     | 96 +++++++++----------
 .../CL/sycl/{intel => ONEAPI}/reduction.hpp   | 56 +++++------
 .../spec_constant.hpp                         |  6 +-
 .../CL/sycl/{intel => ONEAPI}/sub_group.hpp   | 20 ++--
 sycl/include/CL/sycl/detail/spirv.hpp         | 94 +++++++++---------
 sycl/include/CL/sycl/detail/type_traits.hpp   |  6 +-
 sycl/include/CL/sycl/handler.hpp              | 20 ++--
 sycl/include/CL/sycl/nd_item.hpp              |  4 +-
 sycl/include/CL/sycl/program.hpp              |  8 +-
 sycl/source/detail/program_impl.cpp           |  2 +-
 .../program_manager/program_manager.cpp       |  4 +-
 sycl/source/detail/reduction.cpp              |  6 +-
 sycl/source/function_pointer.cpp              |  6 +-
 sycl/test/abi/sycl_symbols_linux.dump         |  4 +-
 sycl/test/atomic_ref/accessor.cpp             | 22 ++---
 sycl/test/atomic_ref/add.cpp                  | 36 ++++---
 sycl/test/atomic_ref/compare_exchange.cpp     |  6 +-
 sycl/test/atomic_ref/exchange.cpp             |  6 +-
 sycl/test/atomic_ref/load.cpp                 |  6 +-
 sycl/test/atomic_ref/max.cpp                  |  6 +-
 sycl/test/atomic_ref/min.cpp                  |  6 +-
 sycl/test/atomic_ref/store.cpp                |  6 +-
 sycl/test/atomic_ref/sub.cpp                  | 36 ++++---
 sycl/test/built-ins/printf.cpp                | 22 ++---
 .../function-pointers/fp-as-kernel-arg.cpp    |  4 +-
 .../pass-fp-through-buffer.cpp                |  8 +-
 sycl/test/group-algorithm/all_of.cpp          |  2 +-
 sycl/test/group-algorithm/any_of.cpp          |  2 +-
 sycl/test/group-algorithm/broadcast.cpp       |  2 +-
 sycl/test/group-algorithm/exclusive_scan.cpp  |  2 +-
 sycl/test/group-algorithm/inclusive_scan.cpp  |  2 +-
 sycl/test/group-algorithm/leader.cpp          |  2 +-
 sycl/test/group-algorithm/none_of.cpp         |  2 +-
 sycl/test/group-algorithm/reduce.cpp          |  2 +-
 sycl/test/linear_id/linear-sub_group.cpp      |  2 +-
 sycl/test/reduction/reduction_ctor.cpp        | 22 ++---
 .../reduction/reduction_nd_conditional.cpp    | 10 +-
 sycl/test/reduction/reduction_nd_ext_type.hpp | 14 +--
 sycl/test/reduction/reduction_nd_lambda.cpp   |  4 +-
 sycl/test/reduction/reduction_nd_s0_dw.cpp    | 40 ++++----
 sycl/test/reduction/reduction_nd_s0_rw.cpp    | 40 ++++----
 sycl/test/reduction/reduction_nd_s1_dw.cpp    | 40 ++++----
 sycl/test/reduction/reduction_nd_s1_rw.cpp    | 40 ++++----
 sycl/test/reduction/reduction_placeholder.cpp | 16 ++--
 sycl/test/reduction/reduction_transparent.cpp | 12 +--
 sycl/test/reduction/reduction_usm.cpp         | 16 ++--
 .../regression/sub-group-store-const-ref.cpp  |  2 +-
 sycl/test/spec_const/spec_const_hw.cpp        |  6 +-
 sycl/test/spec_const/spec_const_neg.cpp       |  6 +-
 sycl/test/spec_const/spec_const_redefine.cpp  |  4 +-
 sycl/test/spec_const/spec_const_types.cpp     | 24 ++---
 sycl/test/sub_group/barrier.cpp               |  2 +-
 sycl/test/sub_group/broadcast.hpp             |  2 +-
 sycl/test/sub_group/common.cpp                |  2 +-
 sycl/test/sub_group/common_ocl.cpp            |  2 +-
 sycl/test/sub_group/generic-shuffle.cpp       |  4 +-
 sycl/test/sub_group/generic_reduce.cpp        |  6 +-
 sycl/test/sub_group/load_store.cpp            |  4 +-
 sycl/test/sub_group/reduce.hpp                | 26 ++---
 sycl/test/sub_group/scan.hpp                  | 34 +++----
 sycl/test/sub_group/shuffle.hpp               |  4 +-
 sycl/test/sub_group/vote.cpp                  |  2 +-
 76 files changed, 531 insertions(+), 499 deletions(-)
 rename sycl/include/CL/sycl/{intel => ONEAPI}/atomic.hpp (57%)
 rename sycl/include/CL/sycl/{intel => ONEAPI}/atomic_accessor.hpp (97%)
 rename sycl/include/CL/sycl/{intel => ONEAPI}/atomic_enums.hpp (93%)
 rename sycl/include/CL/sycl/{intel => ONEAPI}/atomic_fence.hpp (87%)
 rename sycl/include/CL/sycl/{intel => ONEAPI}/atomic_ref.hpp (97%)
 rename sycl/include/CL/sycl/{intel => ONEAPI}/builtins.hpp (96%)
 rename sycl/include/CL/sycl/{intel => ONEAPI}/function_pointer.hpp (98%)
 rename sycl/include/CL/sycl/{intel => ONEAPI}/functional.hpp (85%)
 rename sycl/include/CL/sycl/{intel => ONEAPI}/group_algorithm.hpp (93%)
 rename sycl/include/CL/sycl/{intel => ONEAPI}/reduction.hpp (96%)
 rename sycl/include/CL/sycl/{experimental => ONEAPI}/spec_constant.hpp (93%)
 rename sycl/include/CL/sycl/{intel => ONEAPI}/sub_group.hpp (97%)

diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp
index ad5a3768846c7..fa5f91e1b76e0 100644
--- a/clang/lib/Sema/SemaSYCL.cpp
+++ b/clang/lib/Sema/SemaSYCL.cpp
@@ -2816,7 +2816,7 @@ bool Util::isSyclSpecConstantType(const QualType &Ty) {
   std::array<DeclContextDesc, 4> Scopes = {
       Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"},
       Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"},
-      Util::DeclContextDesc{clang::Decl::Kind::Namespace, "experimental"},
+      Util::DeclContextDesc{clang::Decl::Kind::Namespace, "ONEAPI"},
       Util::DeclContextDesc{Decl::Kind::ClassTemplateSpecialization, Name}};
   return matchQualifiedTypeName(Ty, Scopes);
 }
diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp
index 3184c58edcbfc..1a07d8a1a4a71 100644
--- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp
+++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp
@@ -242,7 +242,7 @@ struct get_kernel_name_t<auto_name, Type> {
   using name = Type;
 };
 
-namespace experimental {
+namespace ONEAPI {
 template <typename T, typename ID = T>
 class spec_constant {
 public:
@@ -256,7 +256,7 @@ class spec_constant {
     return get();
   }
 };
-} // namespace experimental
+} // namespace ONEAPI
 
 #define ATTR_SYCL_KERNEL __attribute__((sycl_kernel))
 template <typename KernelName = auto_name, typename KernelType>
diff --git a/clang/test/CodeGenSYCL/int_header_spec_const.cpp b/clang/test/CodeGenSYCL/int_header_spec_const.cpp
index e6743c4ea2e91..d41798ceb0475 100644
--- a/clang/test/CodeGenSYCL/int_header_spec_const.cpp
+++ b/clang/test/CodeGenSYCL/int_header_spec_const.cpp
@@ -20,18 +20,18 @@ class MyDoubleConst;
 
 int main() {
   // Create specialization constants.
-  cl::sycl::experimental::spec_constant<bool, MyBoolConst> i1(false);
-  cl::sycl::experimental::spec_constant<char, MyInt8Const> i8(0);
-  cl::sycl::experimental::spec_constant<unsigned char, MyUInt8Const> ui8(0);
-  cl::sycl::experimental::spec_constant<short, MyInt16Const> i16(0);
-  cl::sycl::experimental::spec_constant<unsigned short, MyUInt16Const> ui16(0);
-  cl::sycl::experimental::spec_constant<int, MyInt32Const> i32(0);
+  cl::sycl::ONEAPI::spec_constant<bool, MyBoolConst> i1(false);
+  cl::sycl::ONEAPI::spec_constant<char, MyInt8Const> i8(0);
+  cl::sycl::ONEAPI::spec_constant<unsigned char, MyUInt8Const> ui8(0);
+  cl::sycl::ONEAPI::spec_constant<short, MyInt16Const> i16(0);
+  cl::sycl::ONEAPI::spec_constant<unsigned short, MyUInt16Const> ui16(0);
+  cl::sycl::ONEAPI::spec_constant<int, MyInt32Const> i32(0);
   // Constant used twice, but there must be single entry in the int header,
   // otherwise compilation error would be issued.
-  cl::sycl::experimental::spec_constant<int, MyInt32Const> i32_1(0);
-  cl::sycl::experimental::spec_constant<unsigned int, MyUInt32Const> ui32(0);
-  cl::sycl::experimental::spec_constant<float, MyFloatConst> f32(0);
-  cl::sycl::experimental::spec_constant<double, MyDoubleConst> f64(0);
+  cl::sycl::ONEAPI::spec_constant<int, MyInt32Const> i32_1(0);
+  cl::sycl::ONEAPI::spec_constant<unsigned int, MyUInt32Const> ui32(0);
+  cl::sycl::ONEAPI::spec_constant<float, MyFloatConst> f32(0);
+  cl::sycl::ONEAPI::spec_constant<double, MyDoubleConst> f64(0);
 
   double val;
   double *ptr = &val; // to avoid "unused" warnings
diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp
index 9e3efc6321096..4b63b55e26f4d 100644
--- a/clang/test/SemaSYCL/Inputs/sycl.hpp
+++ b/clang/test/SemaSYCL/Inputs/sycl.hpp
@@ -208,7 +208,7 @@ class handler {
   }
 };
 
-namespace experimental {
+namespace ONEAPI {
 
 template <typename T, typename ID = T>
 class spec_constant {};
diff --git a/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp b/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp
index cbfba6e4be32d..4c147023c5d46 100644
--- a/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp
+++ b/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp
@@ -10,9 +10,9 @@ __attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
 }
 
 int main() {
-  cl::sycl::experimental::spec_constant<char, class MyInt32Const> spec_const;
+  cl::sycl::ONEAPI::spec_constant<char, class MyInt32Const> spec_const;
   cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write> accessor;
-  // CHECK: FieldDecl {{.*}} implicit referenced 'cl::sycl::experimental::spec_constant<char, class MyInt32Const>'
+  // CHECK: FieldDecl {{.*}} implicit referenced 'cl::sycl::ONEAPI::spec_constant<char, class MyInt32Const>'
   // CHECK: FieldDecl {{.*}} implicit referenced 'cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write>'
   kernel<class MyKernel>([spec_const, accessor]() {});
   return 0;
diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp
index 4fd3c55b0952c..aca073b79db6d 100644
--- a/sycl/include/CL/sycl.hpp
+++ b/sycl/include/CL/sycl.hpp
@@ -22,17 +22,17 @@
 #include <CL/sycl/handler.hpp>
 #include <CL/sycl/id.hpp>
 #include <CL/sycl/image.hpp>
-#include <CL/sycl/intel/atomic.hpp>
-#include <CL/sycl/intel/builtins.hpp>
-#include <CL/sycl/intel/function_pointer.hpp>
-#include <CL/sycl/intel/group_algorithm.hpp>
-#include <CL/sycl/intel/reduction.hpp>
-#include <CL/sycl/intel/sub_group.hpp>
 #include <CL/sycl/item.hpp>
 #include <CL/sycl/kernel.hpp>
 #include <CL/sycl/multi_ptr.hpp>
 #include <CL/sycl/nd_item.hpp>
 #include <CL/sycl/nd_range.hpp>
+#include <CL/sycl/ONEAPI/atomic.hpp>
+#include <CL/sycl/ONEAPI/builtins.hpp>
+#include <CL/sycl/ONEAPI/function_pointer.hpp>
+#include <CL/sycl/ONEAPI/group_algorithm.hpp>
+#include <CL/sycl/ONEAPI/reduction.hpp>
+#include <CL/sycl/ONEAPI/sub_group.hpp>
 #include <CL/sycl/pipes.hpp>
 #include <CL/sycl/platform.hpp>
 #include <CL/sycl/pointers.hpp>
diff --git a/sycl/include/CL/sycl/intel/atomic.hpp b/sycl/include/CL/sycl/ONEAPI/atomic.hpp
similarity index 57%
rename from sycl/include/CL/sycl/intel/atomic.hpp
rename to sycl/include/CL/sycl/ONEAPI/atomic.hpp
index ecb32b76a6976..a7b07d80d2fd3 100644
--- a/sycl/include/CL/sycl/intel/atomic.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic.hpp
@@ -1,4 +1,4 @@
-//==---------------- atomic.hpp - SYCL_INTEL_extended_atomics --------------==//
+//==--------------- atomic.hpp - SYCL_ONEAPI_extended_atomics --------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -8,7 +8,7 @@
 
 #pragma once
 
-#include <CL/sycl/intel/atomic_accessor.hpp>
-#include <CL/sycl/intel/atomic_enums.hpp>
-#include <CL/sycl/intel/atomic_fence.hpp>
-#include <CL/sycl/intel/atomic_ref.hpp>
+#include <CL/sycl/ONEAPI/atomic_accessor.hpp>
+#include <CL/sycl/ONEAPI/atomic_enums.hpp>
+#include <CL/sycl/ONEAPI/atomic_fence.hpp>
+#include <CL/sycl/ONEAPI/atomic_ref.hpp>
diff --git a/sycl/include/CL/sycl/intel/atomic_accessor.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
similarity index 97%
rename from sycl/include/CL/sycl/intel/atomic_accessor.hpp
rename to sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
index 524e2d4a3ab80..60042100c859d 100644
--- a/sycl/include/CL/sycl/intel/atomic_accessor.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
@@ -9,12 +9,12 @@
 #pragma once
 
 #include <CL/sycl/access/access.hpp>
-#include <CL/sycl/intel/atomic_enums.hpp>
-#include <CL/sycl/intel/atomic_ref.hpp>
+#include <CL/sycl/ONEAPI/atomic_enums.hpp>
+#include <CL/sycl/ONEAPI/atomic_ref.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 
 #if __cplusplus > 201402L
 
@@ -123,6 +123,6 @@ atomic_accessor(buffer<DataT, Dimensions, AllocatorT>, handler,
 
 #endif
 
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/intel/atomic_enums.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_enums.hpp
similarity index 93%
rename from sycl/include/CL/sycl/intel/atomic_enums.hpp
rename to sycl/include/CL/sycl/ONEAPI/atomic_enums.hpp
index a85c9902cd524..ad1d94ca2d815 100644
--- a/sycl/include/CL/sycl/intel/atomic_enums.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_enums.hpp
@@ -1,4 +1,4 @@
-//==---------------- atomic_enums.hpp - SYCL_INTEL_extended_atomics enums --==//
+//==--------------- atomic_enums.hpp - SYCL_ONEAPI_extended_atomics enums --==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -20,7 +20,7 @@
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 
 enum class memory_order : int {
   relaxed,
@@ -63,7 +63,7 @@ namespace detail {
 // Nested ternary conditions in else branch required for C++11
 #if __cplusplus >= 201402L
 static inline constexpr std::memory_order
-getStdMemoryOrder(::cl::sycl::intel::memory_order order) {
+getStdMemoryOrder(::cl::sycl::ONEAPI::memory_order order) {
   switch (order) {
   case memory_order::relaxed:
     return std::memory_order_relaxed;
@@ -81,7 +81,7 @@ getStdMemoryOrder(::cl::sycl::intel::memory_order order) {
 }
 #else
 static inline constexpr std::memory_order
-getStdMemoryOrder(::cl::sycl::intel::memory_order order) {
+getStdMemoryOrder(::cl::sycl::ONEAPI::memory_order order) {
   return (order == memory_order::relaxed)
              ? std::memory_order_relaxed
              : (order == memory_order::__consume_unsupported)
@@ -98,6 +98,6 @@ getStdMemoryOrder(::cl::sycl::intel::memory_order order) {
 } // namespace detail
 #endif // __SYCL_DEVICE_ONLY__
 
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/intel/atomic_fence.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp
similarity index 87%
rename from sycl/include/CL/sycl/intel/atomic_fence.hpp
rename to sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp
index aba95c060b878..4fd306192a4df 100644
--- a/sycl/include/CL/sycl/intel/atomic_fence.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp
@@ -1,4 +1,4 @@
-//==----- atomic_fence.hpp - SYCL_INTEL_extended_atomics atomic_fence ------==//
+//==---- atomic_fence.hpp - SYCL_ONEAPI_extended_atomics atomic_fence ------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -10,7 +10,7 @@
 
 #include <CL/__spirv/spirv_ops.hpp>
 #include <CL/sycl/detail/spirv.hpp>
-#include <CL/sycl/intel/atomic_enums.hpp>
+#include <CL/sycl/ONEAPI/atomic_enums.hpp>
 
 #ifndef __SYCL_DEVICE_ONLY__
 #include <atomic>
@@ -18,7 +18,7 @@
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 namespace detail {
 using namespace cl::sycl::detail;
 }
@@ -35,6 +35,6 @@ static inline void atomic_fence(memory_order order, memory_scope scope) {
 #endif
 }
 
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/intel/atomic_ref.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
similarity index 97%
rename from sycl/include/CL/sycl/intel/atomic_ref.hpp
rename to sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
index 1616727f919b8..05909cc5d4fda 100644
--- a/sycl/include/CL/sycl/intel/atomic_ref.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
@@ -1,4 +1,4 @@
-//==----- atomic_ref.hpp - SYCL_INTEL_extended_atomics atomic_ref ----------==//
+//==----- atomic_ref.hpp - SYCL_ONEAPI_extended_atomics atomic_ref ----------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -13,7 +13,7 @@
 #include <CL/sycl/atomic.hpp>
 #include <CL/sycl/detail/defines.hpp>
 #include <CL/sycl/detail/helpers.hpp>
-#include <CL/sycl/intel/atomic_enums.hpp>
+#include <CL/sycl/ONEAPI/atomic_enums.hpp>
 
 #ifndef __SYCL_DEVICE_ONLY__
 #include <atomic>
@@ -27,14 +27,14 @@ namespace sycl {
 template <typename pointerT, access::address_space AddressSpace>
 class multi_ptr;
 
-namespace intel {
+namespace ONEAPI {
 namespace detail {
 
-// Import from detail:: into intel::detail:: to improve readability later
+// Import from detail:: into ONEAPI::detail:: to improve readability later
 using namespace ::cl::sycl::detail;
 
-using memory_order = cl::sycl::intel::memory_order;
-using memory_scope = cl::sycl::intel::memory_scope;
+using memory_order = cl::sycl::ONEAPI::memory_order;
+using memory_scope = cl::sycl::ONEAPI::memory_scope;
 
 template <typename T>
 using IsValidAtomicType =
@@ -127,14 +127,14 @@ class atomic_ref_base {
       detail::IsValidAtomicType<T>::value,
       "Invalid atomic type.  Valid types are arithmetic and pointer types");
   static_assert(!std::is_same<T, bool>::value,
-                "intel::atomic_ref does not support bool type");
+                "ONEAPI::atomic_ref does not support bool type");
   static_assert(!(std::is_same<T, char>::value ||
                   std::is_same<T, signed char>::value ||
                   std::is_same<T, unsigned char>::value),
-                "intel::atomic_ref does not support char type");
+                "ONEAPI::atomic_ref does not support char type");
   static_assert(!(std::is_same<T, short>::value ||
                   std::is_same<T, unsigned short>::value),
-                "intel::atomic_ref does not support short type");
+                "ONEAPI::atomic_ref does not support short type");
   static_assert(detail::IsValidAtomicAddressSpace<AddressSpace>::value,
                 "Invalid atomic address_space.  Valid address spaces are: "
                 "global_space, local_space, global_device_space");
@@ -651,6 +651,6 @@ class atomic_ref : public detail::atomic_ref_impl<T, DefaultOrder, DefaultScope,
                                 AddressSpace>::atomic_ref_impl;
 };
 
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/intel/builtins.hpp b/sycl/include/CL/sycl/ONEAPI/builtins.hpp
similarity index 96%
rename from sycl/include/CL/sycl/intel/builtins.hpp
rename to sycl/include/CL/sycl/ONEAPI/builtins.hpp
index a59258a2290ba..50ec284a70d39 100644
--- a/sycl/include/CL/sycl/intel/builtins.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/builtins.hpp
@@ -18,8 +18,7 @@
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
-namespace experimental {
+namespace ONEAPI {
 
 // Provides functionality to print data from kernels in a C way:
 // - On non-host devices this function is directly mapped to printf from
@@ -67,8 +66,7 @@ int printf(const CONSTANT_AS char *__format, Args... args) {
 #endif
 }
 
-} // namespace experimental
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
 
diff --git a/sycl/include/CL/sycl/intel/function_pointer.hpp b/sycl/include/CL/sycl/ONEAPI/function_pointer.hpp
similarity index 98%
rename from sycl/include/CL/sycl/intel/function_pointer.hpp
rename to sycl/include/CL/sycl/ONEAPI/function_pointer.hpp
index f812be911b788..3ccbdf636ec99 100644
--- a/sycl/include/CL/sycl/intel/function_pointer.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/function_pointer.hpp
@@ -21,7 +21,7 @@ namespace detail {
 __SYCL_EXPORT cl_ulong getDeviceFunctionPointerImpl(device &D, program &P,
                                                     const char *FuncName);
 }
-namespace intel {
+namespace ONEAPI {
 
 // This is a preview extension implementation, intended to provide early
 // access to a feature for review and community feedback.
@@ -83,6 +83,6 @@ device_func_ptr_holder_t get_device_func_ptr(FuncType F, const char *FuncName,
 
   return sycl::detail::getDeviceFunctionPointerImpl(D, P, FuncName);
 }
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/intel/functional.hpp b/sycl/include/CL/sycl/ONEAPI/functional.hpp
similarity index 85%
rename from sycl/include/CL/sycl/intel/functional.hpp
rename to sycl/include/CL/sycl/ONEAPI/functional.hpp
index ee4ed21b33ffd..ab16a694b00ed 100644
--- a/sycl/include/CL/sycl/intel/functional.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/functional.hpp
@@ -11,7 +11,7 @@
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 
 template <typename T = void> struct minimum {
   T operator()(const T &lhs, const T &rhs) const {
@@ -57,7 +57,7 @@ template <typename T = void> using bit_or = std::bit_or<T>;
 template <typename T = void> using bit_xor = std::bit_xor<T>;
 template <typename T = void> using bit_and = std::bit_and<T>;
 
-} // namespace intel
+} // namespace ONEAPI
 
 #ifdef __SYCL_DEVICE_ONLY__
 namespace detail {
@@ -93,15 +93,15 @@ struct GroupOpTag<T, detail::enable_if_t<detail::is_sgenfloat<T>::value>> {
     return Ret;                                                                \
   }
 
-__SYCL_CALC_OVERLOAD(GroupOpISigned, SMin, intel::minimum<T>)
-__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMin, intel::minimum<T>)
-__SYCL_CALC_OVERLOAD(GroupOpFP, FMin, intel::minimum<T>)
-__SYCL_CALC_OVERLOAD(GroupOpISigned, SMax, intel::maximum<T>)
-__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMax, intel::maximum<T>)
-__SYCL_CALC_OVERLOAD(GroupOpFP, FMax, intel::maximum<T>)
-__SYCL_CALC_OVERLOAD(GroupOpISigned, IAdd, intel::plus<T>)
-__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, IAdd, intel::plus<T>)
-__SYCL_CALC_OVERLOAD(GroupOpFP, FAdd, intel::plus<T>)
+__SYCL_CALC_OVERLOAD(GroupOpISigned, SMin, ONEAPI::minimum<T>)
+__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMin, ONEAPI::minimum<T>)
+__SYCL_CALC_OVERLOAD(GroupOpFP, FMin, ONEAPI::minimum<T>)
+__SYCL_CALC_OVERLOAD(GroupOpISigned, SMax, ONEAPI::maximum<T>)
+__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMax, ONEAPI::maximum<T>)
+__SYCL_CALC_OVERLOAD(GroupOpFP, FMax, ONEAPI::maximum<T>)
+__SYCL_CALC_OVERLOAD(GroupOpISigned, IAdd, ONEAPI::plus<T>)
+__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, IAdd, ONEAPI::plus<T>)
+__SYCL_CALC_OVERLOAD(GroupOpFP, FAdd, ONEAPI::plus<T>)
 
 #undef __SYCL_CALC_OVERLOAD
 
diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
similarity index 93%
rename from sycl/include/CL/sycl/intel/group_algorithm.hpp
rename to sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
index 932a53ba07675..e68a1379765f0 100644
--- a/sycl/include/CL/sycl/intel/group_algorithm.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
@@ -13,10 +13,10 @@
 #include <CL/sycl/detail/spirv.hpp>
 #include <CL/sycl/detail/type_traits.hpp>
 #include <CL/sycl/group.hpp>
-#include <CL/sycl/intel/functional.hpp>
-#include <CL/sycl/intel/sub_group.hpp>
+#include <CL/sycl/ONEAPI/functional.hpp>
+#include <CL/sycl/ONEAPI/sub_group.hpp>
 
-#ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
+#ifndef __DISABLE_SYCL_ONEAPI_GROUP_ALGORITHMS__
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
 namespace detail {
@@ -32,7 +32,7 @@ template <> inline size_t get_local_linear_range<group<3>>(group<3> g) {
   return g.get_local_range(0) * g.get_local_range(1) * g.get_local_range(2);
 }
 template <>
-inline size_t get_local_linear_range<intel::sub_group>(intel::sub_group g) {
+inline size_t get_local_linear_range<ONEAPI::sub_group>(ONEAPI::sub_group g) {
   return g.get_local_range()[0];
 }
 
@@ -53,8 +53,8 @@ __SYCL_GROUP_GET_LOCAL_LINEAR_ID(3);
 #endif // __SYCL_DEVICE_ONLY__
 
 template <>
-inline intel::sub_group::linear_id_type
-get_local_linear_id<intel::sub_group>(intel::sub_group g) {
+inline ONEAPI::sub_group::linear_id_type
+get_local_linear_id<ONEAPI::sub_group>(ONEAPI::sub_group g) {
   return g.get_local_id()[0];
 }
 
@@ -79,22 +79,22 @@ template <> inline id<3> linear_id_to_id(range<3> r, size_t linear_id) {
 
 template <typename T, class BinaryOperation> struct identity {};
 
-template <typename T, typename V> struct identity<T, intel::plus<V>> {
+template <typename T, typename V> struct identity<T, ONEAPI::plus<V>> {
   static constexpr T value = 0;
 };
 
-template <typename T, typename V> struct identity<T, intel::minimum<V>> {
+template <typename T, typename V> struct identity<T, ONEAPI::minimum<V>> {
   static constexpr T value = (std::numeric_limits<T>::max)();
 };
 
-template <typename T, typename V> struct identity<T, intel::maximum<V>> {
+template <typename T, typename V> struct identity<T, ONEAPI::maximum<V>> {
   static constexpr T value = std::numeric_limits<T>::lowest();
 };
 
 template <typename T>
 using native_op_list =
-    type_list<intel::plus<T>, intel::bit_or<T>, intel::bit_xor<T>,
-              intel::bit_and<T>, intel::maximum<T>, intel::minimum<T>>;
+    type_list<ONEAPI::plus<T>, ONEAPI::bit_or<T>, ONEAPI::bit_xor<T>,
+              ONEAPI::bit_and<T>, ONEAPI::maximum<T>, ONEAPI::minimum<T>>;
 
 template <typename T, typename BinaryOperation> struct is_native_op {
   static constexpr bool value =
@@ -123,7 +123,7 @@ Function for_each(Group g, Ptr first, Ptr last, Function f) {
 
 } // namespace detail
 
-namespace intel {
+namespace ONEAPI {
 
 // EnableIf shorthands for algorithms that depend only on type
 template <typename T>
@@ -163,7 +163,7 @@ using EnableIfIsNonNativeOp = cl::sycl::detail::enable_if_t<
 template <typename Group> bool all_of(Group, bool pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   return sycl::detail::spirv::GroupAll<Group>(pred);
 #else
@@ -177,7 +177,7 @@ template <typename Group, typename T, class Predicate>
 bool all_of(Group g, T x, Predicate pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   return all_of(g, pred(x));
 }
 
@@ -186,7 +186,7 @@ EnableIfIsPointer<Ptr, bool> all_of(Group g, Ptr first, Ptr last,
                                     Predicate pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   bool partial = true;
   sycl::detail::for_each(
@@ -206,7 +206,7 @@ EnableIfIsPointer<Ptr, bool> all_of(Group g, Ptr first, Ptr last,
 template <typename Group> bool any_of(Group, bool pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   return sycl::detail::spirv::GroupAny<Group>(pred);
 #else
@@ -220,7 +220,7 @@ template <typename Group, typename T, class Predicate>
 bool any_of(Group g, T x, Predicate pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   return any_of(g, pred(x));
 }
 
@@ -230,7 +230,7 @@ EnableIfIsPointer<Ptr, bool> any_of(Group g, Ptr first, Ptr last,
 #ifdef __SYCL_DEVICE_ONLY__
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   bool partial = false;
   sycl::detail::for_each(
       g, first, last,
@@ -249,7 +249,7 @@ EnableIfIsPointer<Ptr, bool> any_of(Group g, Ptr first, Ptr last,
 template <typename Group> bool none_of(Group, bool pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   return sycl::detail::spirv::GroupAll<Group>(!pred);
 #else
@@ -263,7 +263,7 @@ template <typename Group, typename T, class Predicate>
 bool none_of(Group g, T x, Predicate pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   return none_of(g, pred(x));
 }
 
@@ -273,7 +273,7 @@ EnableIfIsPointer<Ptr, bool> none_of(Group g, Ptr first, Ptr last,
 #ifdef __SYCL_DEVICE_ONLY__
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   return !any_of(g, first, last, pred);
 #else
   (void)g;
@@ -290,7 +290,7 @@ EnableIfIsScalarArithmetic<T> broadcast(Group, T x,
                                         typename Group::id_type local_id) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   return sycl::detail::spirv::GroupBroadcast<Group>(x, local_id);
 #else
@@ -306,7 +306,7 @@ EnableIfIsVectorArithmetic<T> broadcast(Group g, T x,
                                         typename Group::id_type local_id) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   T result;
   for (int s = 0; s < x.get_size(); ++s) {
@@ -327,7 +327,7 @@ EnableIfIsScalarArithmetic<T>
 broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   return broadcast(
       g, x,
@@ -346,7 +346,7 @@ EnableIfIsVectorArithmetic<T>
 broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   T result;
   for (int s = 0; s < x.get_size(); ++s) {
@@ -366,7 +366,7 @@ template <typename Group, typename T>
 EnableIfIsScalarArithmetic<T> broadcast(Group g, T x) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   return broadcast(g, x, 0);
 #else
@@ -381,7 +381,7 @@ template <typename Group, typename T>
 EnableIfIsVectorArithmetic<T> broadcast(Group g, T x) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   T result;
   for (int s = 0; s < x.get_size(); ++s) {
@@ -401,7 +401,7 @@ EnableIfIsScalarArithmeticNativeOp<T, BinaryOperation>
 reduce(Group, T x, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(x, x)), T>::value ||
@@ -423,7 +423,7 @@ EnableIfIsVectorArithmeticNativeOp<T, BinaryOperation>
 reduce(Group g, T x, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(x[0], x[0])),
@@ -443,7 +443,7 @@ EnableIfIsNonNativeOp<T, BinaryOperation> reduce(Group g, T x,
                                                  BinaryOperation op) {
   static_assert(sycl::detail::is_sub_group<Group>::value,
                 "reduce algorithm with user-defined types and operators"
-                "only supports intel::sub_group class.");
+                "only supports ONEAPI::sub_group class.");
   T result = x;
   for (int mask = 1; mask < g.get_max_local_range()[0]; mask *= 2) {
     T tmp = g.shuffle_xor(result, id<1>(mask));
@@ -459,7 +459,7 @@ EnableIfIsScalarArithmeticNativeOp<T, BinaryOperation>
 reduce(Group g, V x, T init, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(init, x)), T>::value ||
@@ -480,7 +480,7 @@ EnableIfIsVectorArithmeticNativeOp<T, BinaryOperation>
 reduce(Group g, V x, T init, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(init[0], x[0])),
@@ -506,7 +506,7 @@ EnableIfIsNonNativeOp<T, BinaryOperation> reduce(Group g, V x, T init,
                                                  BinaryOperation op) {
   static_assert(sycl::detail::is_sub_group<Group>::value,
                 "reduce algorithm with user-defined types and operators"
-                "only supports intel::sub_group class.");
+                "only supports ONEAPI::sub_group class.");
   T result = x;
   for (int mask = 1; mask < g.get_max_local_range()[0]; mask *= 2) {
     T tmp = g.shuffle_xor(result, id<1>(mask));
@@ -522,7 +522,7 @@ EnableIfIsPointer<Ptr, typename Ptr::element_type>
 reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(*first, *first)),
@@ -553,7 +553,7 @@ EnableIfIsPointer<Ptr, T> reduce(Group g, Ptr first, Ptr last, T init,
                                  BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(init, *first)), T>::value ||
@@ -581,7 +581,7 @@ EnableIfIsScalarArithmeticNativeOp<T, BinaryOperation>
 exclusive_scan(Group, T x, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(std::is_same<decltype(binary_op(x, x)), T>::value ||
                     (std::is_same<T, half>::value &&
@@ -602,7 +602,7 @@ EnableIfIsVectorArithmeticNativeOp<T, BinaryOperation>
 exclusive_scan(Group g, T x, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(x[0], x[0])),
@@ -622,7 +622,7 @@ EnableIfIsVectorArithmeticNativeOp<T, BinaryOperation>
 exclusive_scan(Group g, V x, T init, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(init[0], x[0])),
@@ -642,7 +642,7 @@ EnableIfIsScalarArithmeticNativeOp<T, BinaryOperation>
 exclusive_scan(Group g, V x, T init, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(std::is_same<decltype(binary_op(init, x)), T>::value ||
                     (std::is_same<T, half>::value &&
@@ -673,7 +673,7 @@ exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init,
                BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(*first, *first)), T>::value ||
@@ -735,7 +735,7 @@ EnableIfIsVectorArithmeticNativeOp<T, BinaryOperation>
 inclusive_scan(Group g, T x, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(x[0], x[0])),
@@ -755,7 +755,7 @@ EnableIfIsScalarArithmeticNativeOp<T, BinaryOperation>
 inclusive_scan(Group, T x, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(std::is_same<decltype(binary_op(x, x)), T>::value ||
                     (std::is_same<T, half>::value &&
@@ -776,7 +776,7 @@ EnableIfIsScalarArithmeticNativeOp<T, BinaryOperation>
 inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(std::is_same<decltype(binary_op(init, x)), T>::value ||
                     (std::is_same<T, half>::value &&
@@ -799,7 +799,7 @@ EnableIfIsVectorArithmeticNativeOp<T, BinaryOperation>
 inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(init[0], x[0])), T>::value ||
@@ -820,7 +820,7 @@ inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result,
                BinaryOperation binary_op, T init) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(init, *first)), T>::value ||
@@ -878,7 +878,7 @@ EnableIfIsPointer<InPtr, OutPtr> inclusive_scan(Group g, InPtr first,
 template <typename Group> bool leader(Group g) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   typename Group::linear_id_type linear_id =
       sycl::detail::get_local_linear_id(g);
@@ -890,7 +890,7 @@ template <typename Group> bool leader(Group g) {
 #endif
 }
 
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
-#endif // __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
+#endif // __DISABLE_SYCL_ONEAPI_GROUP_ALGORITHMS__
diff --git a/sycl/include/CL/sycl/intel/reduction.hpp b/sycl/include/CL/sycl/ONEAPI/reduction.hpp
similarity index 96%
rename from sycl/include/CL/sycl/intel/reduction.hpp
rename to sycl/include/CL/sycl/ONEAPI/reduction.hpp
index 26adff47778e9..d769d448bf561 100644
--- a/sycl/include/CL/sycl/intel/reduction.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/reduction.hpp
@@ -10,14 +10,16 @@
 
 #include <CL/sycl/accessor.hpp>
 #include <CL/sycl/handler.hpp>
-#include <CL/sycl/intel/group_algorithm.hpp>
+#include <CL/sycl/ONEAPI/group_algorithm.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 
 namespace detail {
 
+using cl::sycl::detail::queue_impl;
+
 __SYCL_EXPORT size_t reduGetMaxWGSize(shared_ptr_class<queue_impl> Queue,
                                       size_t LocalMemBytesPerWorkItem);
 __SYCL_EXPORT size_t reduComputeWGSize(size_t NWorkItems, size_t MaxWGSize,
@@ -33,8 +35,8 @@ using cl::sycl::detail::remove_AS;
 
 template <typename T, class BinaryOperation>
 using IsReduPlus = detail::bool_constant<
-    std::is_same<BinaryOperation, intel::plus<T>>::value ||
-    std::is_same<BinaryOperation, intel::plus<void>>::value>;
+    std::is_same<BinaryOperation, ONEAPI::plus<T>>::value ||
+    std::is_same<BinaryOperation, ONEAPI::plus<void>>::value>;
 
 template <typename T, class BinaryOperation>
 using IsReduMultiplies = detail::bool_constant<
@@ -43,28 +45,28 @@ using IsReduMultiplies = detail::bool_constant<
 
 template <typename T, class BinaryOperation>
 using IsReduMinimum = detail::bool_constant<
-    std::is_same<BinaryOperation, intel::minimum<T>>::value ||
-    std::is_same<BinaryOperation, intel::minimum<void>>::value>;
+    std::is_same<BinaryOperation, ONEAPI::minimum<T>>::value ||
+    std::is_same<BinaryOperation, ONEAPI::minimum<void>>::value>;
 
 template <typename T, class BinaryOperation>
 using IsReduMaximum = detail::bool_constant<
-    std::is_same<BinaryOperation, intel::maximum<T>>::value ||
-    std::is_same<BinaryOperation, intel::maximum<void>>::value>;
+    std::is_same<BinaryOperation, ONEAPI::maximum<T>>::value ||
+    std::is_same<BinaryOperation, ONEAPI::maximum<void>>::value>;
 
 template <typename T, class BinaryOperation>
 using IsReduBitOR = detail::bool_constant<
-    std::is_same<BinaryOperation, intel::bit_or<T>>::value ||
-    std::is_same<BinaryOperation, intel::bit_or<void>>::value>;
+    std::is_same<BinaryOperation, ONEAPI::bit_or<T>>::value ||
+    std::is_same<BinaryOperation, ONEAPI::bit_or<void>>::value>;
 
 template <typename T, class BinaryOperation>
 using IsReduBitXOR = detail::bool_constant<
-    std::is_same<BinaryOperation, intel::bit_xor<T>>::value ||
-    std::is_same<BinaryOperation, intel::bit_xor<void>>::value>;
+    std::is_same<BinaryOperation, ONEAPI::bit_xor<T>>::value ||
+    std::is_same<BinaryOperation, ONEAPI::bit_xor<void>>::value>;
 
 template <typename T, class BinaryOperation>
 using IsReduBitAND = detail::bool_constant<
-    std::is_same<BinaryOperation, intel::bit_and<T>>::value ||
-    std::is_same<BinaryOperation, intel::bit_and<void>>::value>;
+    std::is_same<BinaryOperation, ONEAPI::bit_and<T>>::value ||
+    std::is_same<BinaryOperation, ONEAPI::bit_and<void>>::value>;
 
 template <typename T, class BinaryOperation>
 using IsReduOptForFastAtomicFetch =
@@ -171,7 +173,7 @@ class reducer {
 /// using those operations, which are based on functionality provided by
 /// sycl::atomic class.
 ///
-/// For example, it is known that 0 is identity for intel::plus operations
+/// For example, it is known that 0 is identity for ONEAPI::plus operations
 /// accepting native scalar types to which scalar 0 is convertible.
 /// Also, for int32/64 types the atomic_combine() is lowered to
 /// sycl::atomic::fetch_add().
@@ -313,7 +315,7 @@ class reducer<T, BinaryOperation,
         .fetch_and(MValue);
   }
 
-  /// Atomic MIN operation: *ReduVarPtr = intel::minimum(*ReduVarPtr, MValue);
+  /// Atomic MIN operation: *ReduVarPtr = ONEAPI::minimum(*ReduVarPtr, MValue);
   template <typename _T = T, class _BinaryOperation = BinaryOperation>
   enable_if_t<std::is_same<typename remove_AS<_T>::type, T>::value &&
               (is_geninteger32bit<T>::value || is_geninteger64bit<T>::value) &&
@@ -323,7 +325,7 @@ class reducer<T, BinaryOperation,
         .fetch_min(MValue);
   }
 
-  /// Atomic MAX operation: *ReduVarPtr = intel::maximum(*ReduVarPtr, MValue);
+  /// Atomic MAX operation: *ReduVarPtr = ONEAPI::maximum(*ReduVarPtr, MValue);
   template <typename _T = T, class _BinaryOperation = BinaryOperation>
   enable_if_t<std::is_same<typename remove_AS<_T>::type, T>::value &&
               (is_geninteger32bit<T>::value || is_geninteger64bit<T>::value) &&
@@ -609,11 +611,11 @@ struct get_reduction_aux_kernel_name_t {
 /// Implements a command group function that enqueues a kernel that calls
 /// user's lambda function KernelFunc and also does one iteration of reduction
 /// of elements computed in user's lambda function.
-/// This version uses intel::reduce() algorithm to reduce elements in each
+/// This version uses ONEAPI::reduce() algorithm to reduce elements in each
 /// of work-groups, then it calls fast sycl atomic operations to update
 /// user's reduction variable.
 ///
-/// Briefly: calls user's lambda, intel::reduce() + atomic, INT + ADD/MIN/MAX.
+/// Briefly: calls user's lambda, ONEAPI::reduce() + atomic, INT + ADD/MIN/MAX.
 template <typename KernelName, typename KernelType, int Dims, class Reduction,
           bool UniformWG, typename OutputT>
 enable_if_t<Reduction::has_fast_reduce && Reduction::has_fast_atomics>
@@ -632,7 +634,7 @@ reduCGFuncImpl(handler &CGH, KernelType KernelFunc, const nd_range<Dims> &Range,
         (UniformWG || NDIt.get_global_linear_id() < NWorkItems)
             ? Reducer.MValue
             : Reducer.getIdentity();
-    Reducer.MValue = intel::reduce(NDIt.get_group(), Val, BOp);
+    Reducer.MValue = ONEAPI::reduce(NDIt.get_group(), Val, BOp);
     if (NDIt.get_local_linear_id() == 0)
       Reducer.atomic_combine(Reduction::getOutPointer(Out));
   });
@@ -726,11 +728,11 @@ reduCGFunc(handler &CGH, KernelType KernelFunc, const nd_range<Dims> &Range,
 /// Implements a command group function that enqueues a kernel that
 /// calls user's lambda function and does one iteration of reduction
 /// of elements in each of work-groups.
-/// This version uses intel::reduce() algorithm to reduce elements in each
+/// This version uses ONEAPI::reduce() algorithm to reduce elements in each
 /// of work-groups. At the end of each work-groups the partial sum is written
 /// to a global buffer.
 ///
-/// Briefly: user's lambda, intel:reduce(), FP + ADD/MIN/MAX.
+/// Briefly: user's lambda, ONEAPI:reduce(), FP + ADD/MIN/MAX.
 template <typename KernelName, typename KernelType, int Dims, class Reduction,
           bool UniformWG, typename OutputT>
 enable_if_t<Reduction::has_fast_reduce && !Reduction::has_fast_atomics>
@@ -760,7 +762,7 @@ reduCGFuncImpl(handler &CGH, KernelType KernelFunc, const nd_range<Dims> &Range,
             ? Reducer.MValue
             : Reducer.getIdentity();
     typename Reduction::binary_operation BOp;
-    PSum = intel::reduce(NDIt.get_group(), PSum, BOp);
+    PSum = ONEAPI::reduce(NDIt.get_group(), PSum, BOp);
     if (NDIt.get_local_linear_id() == 0) {
       if (IsUpdateOfUserVar)
         PSum = BOp(*(Reduction::getOutPointer(Out)), PSum);
@@ -873,11 +875,11 @@ reduCGFunc(handler &CGH, KernelType KernelFunc, const nd_range<Dims> &Range,
 
 /// Implements a command group function that enqueues a kernel that does one
 /// iteration of reduction of elements in each of work-groups.
-/// This version uses intel::reduce() algorithm to reduce elements in each
+/// This version uses ONEAPI::reduce() algorithm to reduce elements in each
 /// of work-groups. At the end of each work-groups the partial sum is written
 /// to a global buffer.
 ///
-/// Briefly: aux kernel, intel:reduce(), reproducible results, FP + ADD/MIN/MAX
+/// Briefly: aux kernel, ONEAPI:reduce(), reproducible results, FP + ADD/MIN/MAX
 template <typename KernelName, typename KernelType, bool UniformWG,
           class Reduction, typename InputT, typename OutputT>
 enable_if_t<Reduction::has_fast_reduce && !Reduction::has_fast_atomics>
@@ -897,7 +899,7 @@ reduAuxCGFuncImpl(handler &CGH, size_t NWorkItems, size_t NWorkGroups,
         (UniformWG || (GID < NWorkItems))
             ? In[GID]
             : Reduction::reducer_type::getIdentity();
-    PSum = intel::reduce(NDIt.get_group(), PSum, BOp);
+    PSum = ONEAPI::reduce(NDIt.get_group(), PSum, BOp);
     if (NDIt.get_local_linear_id() == 0) {
       if (IsUpdateOfUserVar)
         PSum = BOp(*(Reduction::getOutPointer(Out)), PSum);
@@ -1074,6 +1076,6 @@ reduction(T *VarPtr, BinaryOperation) {
                                 access::mode::read_write>(VarPtr);
 }
 
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/experimental/spec_constant.hpp b/sycl/include/CL/sycl/ONEAPI/spec_constant.hpp
similarity index 93%
rename from sycl/include/CL/sycl/experimental/spec_constant.hpp
rename to sycl/include/CL/sycl/ONEAPI/spec_constant.hpp
index 104137fdba9c5..ee10513b67ad9 100644
--- a/sycl/include/CL/sycl/experimental/spec_constant.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/spec_constant.hpp
@@ -1,4 +1,4 @@
-//==----- spec_constant.hpp - SYCL public experimental API header file -----==//
+//==----------- spec_constant.hpp - SYCL public ONEAPI API header file -----==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -22,7 +22,7 @@
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace experimental {
+namespace ONEAPI {
 
 class spec_const_error : public compile_program_error {
   using compile_program_error::compile_program_error;
@@ -56,6 +56,6 @@ template <typename T, typename ID = T> class spec_constant {
   }
 };
 
-} // namespace experimental
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/intel/sub_group.hpp b/sycl/include/CL/sycl/ONEAPI/sub_group.hpp
similarity index 97%
rename from sycl/include/CL/sycl/intel/sub_group.hpp
rename to sycl/include/CL/sycl/ONEAPI/sub_group.hpp
index 2c65f08218990..513635f8ee95b 100644
--- a/sycl/include/CL/sycl/intel/sub_group.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/sub_group.hpp
@@ -17,7 +17,7 @@
 #include <CL/sycl/detail/spirv.hpp>
 #include <CL/sycl/detail/type_traits.hpp>
 #include <CL/sycl/id.hpp>
-#include <CL/sycl/intel/functional.hpp>
+#include <CL/sycl/ONEAPI/functional.hpp>
 #include <CL/sycl/range.hpp>
 #include <CL/sycl/types.hpp>
 
@@ -96,7 +96,7 @@ void store(multi_ptr<T, Space> dst, const vec<T, N> &x) {
 
 } // namespace detail
 
-namespace intel {
+namespace ONEAPI {
 
 struct sub_group {
 
@@ -451,7 +451,7 @@ struct sub_group {
   /* --- deprecated collective functions --- */
   template <typename T>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::broadcast instead.")
+                    "sycl::ONEAPI::broadcast instead.")
   EnableIfIsScalarArithmetic<T> broadcast(T x, id<1> local_id) const {
 #ifdef __SYCL_DEVICE_ONLY__
     return sycl::detail::spirv::GroupBroadcast<sub_group>(x, local_id);
@@ -465,7 +465,7 @@ struct sub_group {
 
   template <typename T, class BinaryOperation>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::reduce instead.")
+                    "sycl::ONEAPI::reduce instead.")
   EnableIfIsScalarArithmetic<T> reduce(T x, BinaryOperation op) const {
 #ifdef __SYCL_DEVICE_ONLY__
     return sycl::detail::calc<T, __spv::GroupOperation::Reduce,
@@ -481,7 +481,7 @@ struct sub_group {
 
   template <typename T, class BinaryOperation>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::reduce instead.")
+                    "sycl::ONEAPI::reduce instead.")
   EnableIfIsScalarArithmetic<T> reduce(T x, T init, BinaryOperation op) const {
 #ifdef __SYCL_DEVICE_ONLY__
     return op(init, reduce(x, op));
@@ -496,7 +496,7 @@ struct sub_group {
 
   template <typename T, class BinaryOperation>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::exclusive_scan instead.")
+                    "sycl::ONEAPI::exclusive_scan instead.")
   EnableIfIsScalarArithmetic<T> exclusive_scan(T x, BinaryOperation op) const {
 #ifdef __SYCL_DEVICE_ONLY__
     return sycl::detail::calc<T, __spv::GroupOperation::ExclusiveScan,
@@ -512,7 +512,7 @@ struct sub_group {
 
   template <typename T, class BinaryOperation>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::exclusive_scan instead.")
+                    "sycl::ONEAPI::exclusive_scan instead.")
   EnableIfIsScalarArithmetic<T> exclusive_scan(T x, T init,
                                                BinaryOperation op) const {
 #ifdef __SYCL_DEVICE_ONLY__
@@ -535,7 +535,7 @@ struct sub_group {
 
   template <typename T, class BinaryOperation>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::inclusive_scan instead.")
+                    "sycl::ONEAPI::inclusive_scan instead.")
   EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op) const {
 #ifdef __SYCL_DEVICE_ONLY__
     return sycl::detail::calc<T, __spv::GroupOperation::InclusiveScan,
@@ -551,7 +551,7 @@ struct sub_group {
 
   template <typename T, class BinaryOperation>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::inclusive_scan instead.")
+                    "sycl::ONEAPI::inclusive_scan instead.")
   EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op,
                                                T init) const {
 #ifdef __SYCL_DEVICE_ONLY__
@@ -572,6 +572,6 @@ struct sub_group {
   template <int dimensions> friend class cl::sycl::nd_item;
   sub_group() = default;
 };
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp
index d662e2afc7880..5acf77a918341 100644
--- a/sycl/include/CL/sycl/detail/spirv.hpp
+++ b/sycl/include/CL/sycl/detail/spirv.hpp
@@ -12,14 +12,14 @@
 #include <CL/__spirv/spirv_vars.hpp>
 #include <CL/sycl/detail/generic_type_traits.hpp>
 #include <CL/sycl/detail/type_traits.hpp>
-#include <CL/sycl/intel/atomic_enums.hpp>
+#include <CL/sycl/ONEAPI/atomic_enums.hpp>
 
 #ifdef __SYCL_DEVICE_ONLY__
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 struct sub_group;
-} // namespace intel
+} // namespace ONEAPI
 namespace detail {
 namespace spirv {
 
@@ -29,7 +29,7 @@ template <int Dimensions> struct group_scope<group<Dimensions>> {
   static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Workgroup;
 };
 
-template <> struct group_scope<::cl::sycl::intel::sub_group> {
+template <> struct group_scope<::cl::sycl::ONEAPI::sub_group> {
   static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Subgroup;
 };
 
@@ -87,23 +87,23 @@ T GroupBroadcast(T x, id<Dimensions> local_id) {
 // Single happens-before means semantics should always apply to all spaces
 // Although consume is unsupported, forwarding to acquire is valid
 static inline constexpr __spv::MemorySemanticsMask::Flag
-getMemorySemanticsMask(intel::memory_order Order) {
+getMemorySemanticsMask(ONEAPI::memory_order Order) {
   __spv::MemorySemanticsMask::Flag SpvOrder = __spv::MemorySemanticsMask::None;
   switch (Order) {
-  case intel::memory_order::relaxed:
+  case ONEAPI::memory_order::relaxed:
     SpvOrder = __spv::MemorySemanticsMask::None;
     break;
-  case intel::memory_order::__consume_unsupported:
-  case intel::memory_order::acquire:
+  case ONEAPI::memory_order::__consume_unsupported:
+  case ONEAPI::memory_order::acquire:
     SpvOrder = __spv::MemorySemanticsMask::Acquire;
     break;
-  case intel::memory_order::release:
+  case ONEAPI::memory_order::release:
     SpvOrder = __spv::MemorySemanticsMask::Release;
     break;
-  case intel::memory_order::acq_rel:
+  case ONEAPI::memory_order::acq_rel:
     SpvOrder = __spv::MemorySemanticsMask::AcquireRelease;
     break;
-  case intel::memory_order::seq_cst:
+  case ONEAPI::memory_order::seq_cst:
     SpvOrder = __spv::MemorySemanticsMask::SequentiallyConsistent;
     break;
   }
@@ -113,17 +113,17 @@ getMemorySemanticsMask(intel::memory_order Order) {
       __spv::MemorySemanticsMask::CrossWorkgroupMemory);
 }
 
-static inline constexpr __spv::Scope::Flag getScope(intel::memory_scope Scope) {
+static inline constexpr __spv::Scope::Flag getScope(ONEAPI::memory_scope Scope) {
   switch (Scope) {
-  case intel::memory_scope::work_item:
+  case ONEAPI::memory_scope::work_item:
     return __spv::Scope::Invocation;
-  case intel::memory_scope::sub_group:
+  case ONEAPI::memory_scope::sub_group:
     return __spv::Scope::Subgroup;
-  case intel::memory_scope::work_group:
+  case ONEAPI::memory_scope::work_group:
     return __spv::Scope::Workgroup;
-  case intel::memory_scope::device:
+  case ONEAPI::memory_scope::device:
     return __spv::Scope::Device;
-  case intel::memory_scope::system:
+  case ONEAPI::memory_scope::system:
     return __spv::Scope::CrossDevice;
   }
 }
@@ -131,8 +131,8 @@ static inline constexpr __spv::Scope::Flag getScope(intel::memory_scope Scope) {
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
 AtomicCompareExchange(multi_ptr<T, AddressSpace> MPtr,
-                      intel::memory_scope Scope, intel::memory_order Success,
-                      intel::memory_order Failure, T Desired, T Expected) {
+                      ONEAPI::memory_scope Scope, ONEAPI::memory_order Success,
+                      ONEAPI::memory_order Failure, T Desired, T Expected) {
   auto SPIRVSuccess = getMemorySemanticsMask(Success);
   auto SPIRVFailure = getMemorySemanticsMask(Failure);
   auto SPIRVScope = getScope(Scope);
@@ -144,8 +144,8 @@ AtomicCompareExchange(multi_ptr<T, AddressSpace> MPtr,
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_floating_point<T>::value, T>
 AtomicCompareExchange(multi_ptr<T, AddressSpace> MPtr,
-                      intel::memory_scope Scope, intel::memory_order Success,
-                      intel::memory_order Failure, T Desired, T Expected) {
+                      ONEAPI::memory_scope Scope, ONEAPI::memory_order Success,
+                      ONEAPI::memory_order Failure, T Desired, T Expected) {
   using I = detail::make_unsinged_integer_t<T>;
   auto SPIRVSuccess = getMemorySemanticsMask(Success);
   auto SPIRVFailure = getMemorySemanticsMask(Failure);
@@ -162,8 +162,8 @@ AtomicCompareExchange(multi_ptr<T, AddressSpace> MPtr,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicLoad(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-           intel::memory_order Order) {
+AtomicLoad(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+           ONEAPI::memory_order Order) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -172,8 +172,8 @@ AtomicLoad(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_floating_point<T>::value, T>
-AtomicLoad(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-           intel::memory_order Order) {
+AtomicLoad(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+           ONEAPI::memory_order Order) {
   using I = detail::make_unsinged_integer_t<T>;
   auto *PtrInt =
       reinterpret_cast<typename multi_ptr<I, AddressSpace>::pointer_t>(
@@ -186,8 +186,8 @@ AtomicLoad(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value>
-AtomicStore(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-            intel::memory_order Order, T Value) {
+AtomicStore(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+            ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -196,8 +196,8 @@ AtomicStore(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_floating_point<T>::value>
-AtomicStore(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-            intel::memory_order Order, T Value) {
+AtomicStore(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+            ONEAPI::memory_order Order, T Value) {
   using I = detail::make_unsinged_integer_t<T>;
   auto *PtrInt =
       reinterpret_cast<typename multi_ptr<I, AddressSpace>::pointer_t>(
@@ -210,8 +210,8 @@ AtomicStore(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicExchange(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-               intel::memory_order Order, T Value) {
+AtomicExchange(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+               ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -220,8 +220,8 @@ AtomicExchange(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_floating_point<T>::value, T>
-AtomicExchange(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-               intel::memory_order Order, T Value) {
+AtomicExchange(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+               ONEAPI::memory_order Order, T Value) {
   using I = detail::make_unsinged_integer_t<T>;
   auto *PtrInt =
       reinterpret_cast<typename multi_ptr<I, AddressSpace>::pointer_t>(
@@ -236,8 +236,8 @@ AtomicExchange(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicIAdd(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-           intel::memory_order Order, T Value) {
+AtomicIAdd(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+           ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -246,8 +246,8 @@ AtomicIAdd(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicISub(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-           intel::memory_order Order, T Value) {
+AtomicISub(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+           ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -256,8 +256,8 @@ AtomicISub(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicAnd(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-          intel::memory_order Order, T Value) {
+AtomicAnd(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+          ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -266,8 +266,8 @@ AtomicAnd(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicOr(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-         intel::memory_order Order, T Value) {
+AtomicOr(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+         ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -276,8 +276,8 @@ AtomicOr(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicXor(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-          intel::memory_order Order, T Value) {
+AtomicXor(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+          ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -286,8 +286,8 @@ AtomicXor(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicMin(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-          intel::memory_order Order, T Value) {
+AtomicMin(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+          ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -296,8 +296,8 @@ AtomicMin(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicMax(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-          intel::memory_order Order, T Value) {
+AtomicMax(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+          ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
diff --git a/sycl/include/CL/sycl/detail/type_traits.hpp b/sycl/include/CL/sycl/detail/type_traits.hpp
index 3f52acc8a2de2..df480f58f99ff 100644
--- a/sycl/include/CL/sycl/detail/type_traits.hpp
+++ b/sycl/include/CL/sycl/detail/type_traits.hpp
@@ -18,9 +18,9 @@
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
 template <int Dimensions> class group;
-namespace intel {
+namespace ONEAPI {
 struct sub_group;
-} // namespace intel
+} // namespace ONEAPI
 namespace detail {
 namespace half_impl {
 class half;
@@ -313,7 +313,7 @@ struct is_group<group<Dimensions>> : std::true_type {};
 
 template <typename T> struct is_sub_group : std::false_type {};
 
-template <> struct is_sub_group<intel::sub_group> : std::true_type {};
+template <> struct is_sub_group<ONEAPI::sub_group> : std::true_type {};
 
 template <typename T>
 struct is_generic_group
diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp
index b4504489662c0..6eff9c0144d21 100644
--- a/sycl/include/CL/sycl/handler.hpp
+++ b/sycl/include/CL/sycl/handler.hpp
@@ -172,7 +172,7 @@ checkValueRange(const T &V) {
 
 } // namespace detail
 
-namespace intel {
+namespace ONEAPI {
 namespace detail {
 template <typename T, class BinaryOperation, int Dims, bool IsUSM,
           access::mode AccMode, access::placeholder IsPlaceholder>
@@ -201,7 +201,7 @@ __SYCL_EXPORT size_t reduGetMaxWGSize(shared_ptr_class<queue_impl> Queue,
                                       size_t LocalMemBytesPerWorkItem);
 
 } // namespace detail
-} // namespace intel
+} // namespace ONEAPI
 
 /// Command group handler class.
 ///
@@ -1003,7 +1003,7 @@ class __SYCL_EXPORT handler {
   detail::enable_if_t<Reduction::accessor_mode == access::mode::read_write &&
                       Reduction::has_fast_atomics && !Reduction::is_usm>
   parallel_for(nd_range<Dims> Range, Reduction Redu, KernelType KernelFunc) {
-    intel::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
+    ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
                                           Redu.getUserAccessor());
   }
 
@@ -1016,7 +1016,7 @@ class __SYCL_EXPORT handler {
   detail::enable_if_t<Reduction::accessor_mode == access::mode::read_write &&
                       Reduction::has_fast_atomics && Reduction::is_usm>
   parallel_for(nd_range<Dims> Range, Reduction Redu, KernelType KernelFunc) {
-    intel::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
+    ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
                                           Redu.getUSMPointer());
   }
 
@@ -1037,7 +1037,7 @@ class __SYCL_EXPORT handler {
   parallel_for(nd_range<Dims> Range, Reduction Redu, KernelType KernelFunc) {
     shared_ptr_class<detail::queue_impl> QueueCopy = MQueue;
     auto RWAcc = Redu.getReadWriteScalarAcc(*this);
-    intel::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
+    ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
                                           RWAcc);
     this->finalize();
 
@@ -1065,7 +1065,7 @@ class __SYCL_EXPORT handler {
   /// TODO: Need to handle more than 1 reduction in parallel_for().
   /// TODO: Support HOST. The kernels called by this parallel_for() may use
   /// some functionality that is not yet supported on HOST such as:
-  /// barrier(), and intel::reduce() that also may be used in more
+  /// barrier(), and ONEAPI::reduce() that also may be used in more
   /// optimized implementations waiting for their turn of code-review.
   template <typename KernelName = detail::auto_name, typename KernelType,
             int Dims, typename Reduction>
@@ -1097,14 +1097,14 @@ class __SYCL_EXPORT handler {
     // TODO: currently the maximal work group size is determined for the given
     // queue/device, while it may be safer to use queries to the kernel compiled
     // for the device.
-    size_t MaxWGSize = intel::detail::reduGetMaxWGSize(MQueue, OneElemSize);
+    size_t MaxWGSize = ONEAPI::detail::reduGetMaxWGSize(MQueue, OneElemSize);
     if (Range.get_local_range().size() > MaxWGSize)
       throw sycl::runtime_error("The implementation handling parallel_for with"
                                 " reduction requires smaller work group size.",
                                 PI_INVALID_WORK_GROUP_SIZE);
 
     // 1. Call the kernel that includes user's lambda function.
-    intel::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu);
+    ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu);
     shared_ptr_class<detail::queue_impl> QueueCopy = MQueue;
     this->finalize();
 
@@ -1126,7 +1126,7 @@ class __SYCL_EXPORT handler {
       handler AuxHandler(QueueCopy, MIsHost);
       AuxHandler.saveCodeLoc(MCodeLoc);
 
-      NWorkItems = intel::detail::reduAuxCGFunc<KernelName, KernelType>(
+      NWorkItems = ONEAPI::detail::reduAuxCGFunc<KernelName, KernelType>(
           AuxHandler, NWorkItems, MaxWGSize, Redu);
       MLastEvent = AuxHandler.finalize();
     } // end while (NWorkItems > 1)
@@ -1861,7 +1861,7 @@ class __SYCL_EXPORT handler {
   // in handler from reduction_impl methods.
   template <typename T, class BinaryOperation, int Dims, bool IsUSM,
             access::mode AccMode, access::placeholder IsPlaceholder>
-  friend class intel::detail::reduction_impl;
+  friend class ONEAPI::detail::reduction_impl;
 
   friend void detail::associateWithHandler(handler &,
                                            detail::AccessorBaseHost *,
diff --git a/sycl/include/CL/sycl/nd_item.hpp b/sycl/include/CL/sycl/nd_item.hpp
index 62abba368dc7f..37c61b03525d3 100644
--- a/sycl/include/CL/sycl/nd_item.hpp
+++ b/sycl/include/CL/sycl/nd_item.hpp
@@ -14,9 +14,9 @@
 #include <CL/sycl/detail/helpers.hpp>
 #include <CL/sycl/group.hpp>
 #include <CL/sycl/id.hpp>
-#include <CL/sycl/intel/sub_group.hpp>
 #include <CL/sycl/item.hpp>
 #include <CL/sycl/nd_range.hpp>
+#include <CL/sycl/ONEAPI/sub_group.hpp>
 #include <CL/sycl/range.hpp>
 
 #include <cstddef>
@@ -67,7 +67,7 @@ template <int dimensions = 1> class nd_item {
 
   group<dimensions> get_group() const { return Group; }
 
-  intel::sub_group get_sub_group() const { return intel::sub_group(); }
+  ONEAPI::sub_group get_sub_group() const { return ONEAPI::sub_group(); }
 
   size_t ALWAYS_INLINE get_group(int dimension) const {
     size_t Size = Group[dimension];
diff --git a/sycl/include/CL/sycl/program.hpp b/sycl/include/CL/sycl/program.hpp
index c6dbebf3f45bf..252776ed83ee4 100644
--- a/sycl/include/CL/sycl/program.hpp
+++ b/sycl/include/CL/sycl/program.hpp
@@ -12,9 +12,9 @@
 #include <CL/sycl/detail/export.hpp>
 #include <CL/sycl/detail/kernel_desc.hpp>
 #include <CL/sycl/detail/os_util.hpp>
-#include <CL/sycl/experimental/spec_constant.hpp>
 #include <CL/sycl/info/info_desc.hpp>
 #include <CL/sycl/kernel.hpp>
+#include <CL/sycl/ONEAPI/spec_constant.hpp>
 #include <CL/sycl/stl.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
@@ -307,7 +307,7 @@ class __SYCL_EXPORT program {
   /// \return a specialization constant instance corresponding to given type ID
   ///         passed as a template parameter
   template <typename ID, typename T>
-  experimental::spec_constant<T, ID> set_spec_constant(T Cst) {
+  ONEAPI::spec_constant<T, ID> set_spec_constant(T Cst) {
     constexpr const char *Name = detail::SpecConstantInfo<ID>::getName();
     static_assert(std::is_integral<T>::value ||
                       std::is_floating_point<T>::value,
@@ -315,10 +315,10 @@ class __SYCL_EXPORT program {
 #ifdef __SYCL_DEVICE_ONLY__
     (void)Cst;
     (void)Name;
-    return experimental::spec_constant<T, ID>();
+    return ONEAPI::spec_constant<T, ID>();
 #else
     set_spec_constant_impl(Name, &Cst, sizeof(T));
-    return experimental::spec_constant<T, ID>(Cst);
+    return ONEAPI::spec_constant<T, ID>(Cst);
 #endif // __SYCL_DEVICE_ONLY__
   }
 
diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp
index a73064e455067..f1a73b567c816 100644
--- a/sycl/source/detail/program_impl.cpp
+++ b/sycl/source/detail/program_impl.cpp
@@ -484,7 +484,7 @@ vector_class<device> program_impl::get_info<info::program::devices>() const {
 void program_impl::set_spec_constant_impl(const char *Name, const void *ValAddr,
                                           size_t ValSize) {
   if (MState != program_state::none)
-    throw cl::sycl::experimental::spec_const_error("Invalid program state",
+    throw cl::sycl::ONEAPI::spec_const_error("Invalid program state",
                                                    PI_INVALID_PROGRAM);
   // Reuse cached programs lock as opposed to introducing a new lock.
   auto LockGuard = MContext->getKernelProgramCache().acquireCachedPrograms();
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
index a4d7f162c1ab6..ca175f0546c31 100644
--- a/sycl/source/detail/program_manager/program_manager.cpp
+++ b/sycl/source/detail/program_manager/program_manager.cpp
@@ -14,7 +14,7 @@
 #include <CL/sycl/detail/util.hpp>
 #include <CL/sycl/device.hpp>
 #include <CL/sycl/exception.hpp>
-#include <CL/sycl/experimental/spec_constant.hpp>
+#include <CL/sycl/ONEAPI/spec_constant.hpp>
 #include <CL/sycl/stl.hpp>
 #include <detail/context_impl.hpp>
 #include <detail/device_impl.hpp>
@@ -986,7 +986,7 @@ void ProgramManager::flushSpecConstants(const program_impl &Prg,
       auto LockGuard = Ctx->getKernelProgramCache().acquireCachedPrograms();
       auto It = NativePrograms.find(NativePrg);
       if (It == NativePrograms.end())
-        throw sycl::experimental::spec_const_error(
+        throw sycl::ONEAPI::spec_const_error(
             "spec constant is set in a program w/o a binary image",
             PI_INVALID_OPERATION);
       Img = It->second;
diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp
index 71e16724ce3b2..7a0bb02ca103b 100644
--- a/sycl/source/detail/reduction.cpp
+++ b/sycl/source/detail/reduction.cpp
@@ -6,12 +6,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <CL/sycl/intel/reduction.hpp>
+#include <CL/sycl/ONEAPI/reduction.hpp>
 #include <detail/queue_impl.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 namespace detail {
 
 // TODO: The algorithm of choosing the work-group size is definitely
@@ -62,6 +62,6 @@ reduGetMaxWGSize(shared_ptr_class<sycl::detail::queue_impl> Queue,
 }
 
 } // namespace detail
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/source/function_pointer.cpp b/sycl/source/function_pointer.cpp
index c273ae817c8bf..cf67d32afd9f1 100644
--- a/sycl/source/function_pointer.cpp
+++ b/sycl/source/function_pointer.cpp
@@ -6,16 +6,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <CL/sycl/intel/function_pointer.hpp>
 #include <detail/device_impl.hpp>
+#include <CL/sycl/ONEAPI/function_pointer.hpp>
 #include <detail/program_impl.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
 namespace detail {
-intel::device_func_ptr_holder_t
+ONEAPI::device_func_ptr_holder_t
 getDeviceFunctionPointerImpl(device &D, program &P, const char *FuncName) {
-  intel::device_func_ptr_holder_t FPtr = 0;
+  ONEAPI::device_func_ptr_holder_t FPtr = 0;
   // FIXME: return value must be checked here, but since we cannot yet check
   // if corresponding extension is supported, let's silently ignore it here.
   const detail::plugin &Plugin = detail::getSyclObjImpl(P)->getPlugin();
diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump
index 0b336464462d3..3d6b8a86b01fc 100644
--- a/sycl/test/abi/sycl_symbols_linux.dump
+++ b/sycl/test/abi/sycl_symbols_linux.dump
@@ -3625,8 +3625,6 @@ _ZN2cl4sycl5eventC1Ev
 _ZN2cl4sycl5eventC2EP9_cl_eventRKNS0_7contextE
 _ZN2cl4sycl5eventC2ESt10shared_ptrINS0_6detail10event_implEE
 _ZN2cl4sycl5eventC2Ev
-_ZN2cl4sycl5intel6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm
-_ZN2cl4sycl5intel6detail17reduComputeWGSizeEmmRm
 _ZN2cl4sycl5queue10mem_adviseEPKvm14_pi_mem_advice
 _ZN2cl4sycl5queue10wait_proxyERKNS0_6detail13code_locationE
 _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE
@@ -3647,6 +3645,8 @@ _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_15device_selectorERKNS0_13property_list
 _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_15device_selectorERKSt8functionIFvNS0_14exception_listEEERKNS0_13property_listE
 _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_6deviceERKNS0_13property_listE
 _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_6deviceERKSt8functionIFvNS0_14exception_listEEERKNS0_13property_listE
+_ZN2cl4sycl6ONEAPI6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm
+_ZN2cl4sycl6ONEAPI6detail17reduComputeWGSizeEmmRm
 _ZN2cl4sycl6detail10image_implILi1EE10getDevicesESt10shared_ptrINS1_12context_implEE
 _ZN2cl4sycl6detail10image_implILi1EE10setPitchesEv
 _ZN2cl4sycl6detail10image_implILi1EE11allocateMemESt10shared_ptrINS1_12context_implEEbPvRP9_pi_event
diff --git a/sycl/test/atomic_ref/accessor.cpp b/sycl/test/atomic_ref/accessor.cpp
index 86067e2c74906..6410749cf4877 100644
--- a/sycl/test/atomic_ref/accessor.cpp
+++ b/sycl/test/atomic_ref/accessor.cpp
@@ -10,7 +10,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 // Equivalent to add_test from add.cpp
 // Uses atomic_accessor instead of atomic_ref
@@ -26,12 +26,12 @@ template <typename T> void accessor_test(queue q, size_t N) {
       static_assert(
           std::is_same<decltype(atomic_accessor(sum_buf, cgh, relaxed_order,
                                                 device_scope)),
-                       atomic_accessor<T, 1, intel::memory_order::relaxed,
-                                       intel::memory_scope::device>>::value,
+                       atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
+                                       ONEAPI::memory_scope::device>>::value,
           "atomic_accessor type incorrectly deduced");
 #endif
-      auto sum = atomic_accessor<T, 1, intel::memory_order::relaxed,
-                                 intel::memory_scope::device>(sum_buf, cgh);
+      auto sum = atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
+                                 ONEAPI::memory_scope::device>(sum_buf, cgh);
       auto out =
           output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
@@ -39,8 +39,8 @@ template <typename T> void accessor_test(queue q, size_t N) {
         static_assert(
             std::is_same<
                 decltype(sum[0]),
-                atomic_ref<T, intel::memory_order::relaxed,
-                           intel::memory_scope::device,
+                atomic_ref<T, ONEAPI::memory_order::relaxed,
+                           ONEAPI::memory_scope::device,
                            access::address_space::global_space>>::value,
             "atomic_accessor returns incorrect atomic_ref");
         out[gid] = sum[0].fetch_add(T(1));
@@ -70,8 +70,8 @@ void local_accessor_test(queue q, size_t N, size_t L = 8) {
     buffer<T> output_buf(output.data(), output.size());
     q.submit([&](handler &cgh) {
       auto sum =
-          atomic_accessor<T, 1, intel::memory_order::relaxed,
-                          intel::memory_scope::device, access::target::local>(
+          atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
+                          ONEAPI::memory_scope::device, access::target::local>(
               1, cgh);
       auto out = output_buf.template get_access<access::mode::read_write>(cgh);
       cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) {
@@ -80,8 +80,8 @@ void local_accessor_test(queue q, size_t N, size_t L = 8) {
         it.barrier();
         static_assert(
             std::is_same<decltype(sum[0]),
-                         atomic_ref<T, intel::memory_order::relaxed,
-                                    intel::memory_scope::device,
+                         atomic_ref<T, ONEAPI::memory_order::relaxed,
+                                    ONEAPI::memory_scope::device,
                                     access::address_space::local_space>>::value,
             "local atomic_accessor returns incorrect atomic_ref");
         T result = sum[0].fetch_add(T(1));
diff --git a/sycl/test/atomic_ref/add.cpp b/sycl/test/atomic_ref/add.cpp
index cfe943d176299..565048f1be250 100644
--- a/sycl/test/atomic_ref/add.cpp
+++ b/sycl/test/atomic_ref/add.cpp
@@ -10,7 +10,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T, typename Difference = T>
 void add_fetch_test(queue q, size_t N) {
@@ -23,10 +23,13 @@ void add_fetch_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(sum[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(sum[0]);
         out[gid] = atm.fetch_add(Difference(1));
       });
     });
@@ -56,10 +59,13 @@ void add_plus_equal_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(sum[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(sum[0]);
         out[gid] = atm += Difference(1);
       });
     });
@@ -89,10 +95,13 @@ void add_pre_inc_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(sum[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(sum[0]);
         out[gid] = ++atm;
       });
     });
@@ -122,10 +131,13 @@ void add_post_inc_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(sum[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(sum[0]);
         out[gid] = atm++;
       });
     });
@@ -153,13 +165,11 @@ void add_test(queue q, size_t N) {
 }
 
 // Floating-point types do not support pre- or post-increment
-template <>
-void add_test<float>(queue q, size_t N) {
+template <> void add_test<float>(queue q, size_t N) {
   add_fetch_test<float>(q, N);
   add_plus_equal_test<float>(q, N);
 }
-template <>
-void add_test<double>(queue q, size_t N) {
+template <> void add_test<double>(queue q, size_t N) {
   add_fetch_test<double>(q, N);
   add_plus_equal_test<double>(q, N);
 }
diff --git a/sycl/test/atomic_ref/compare_exchange.cpp b/sycl/test/atomic_ref/compare_exchange.cpp
index 11c2caa6ef3c4..db8b12a846498 100644
--- a/sycl/test/atomic_ref/compare_exchange.cpp
+++ b/sycl/test/atomic_ref/compare_exchange.cpp
@@ -9,7 +9,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T>
 class compare_exchange_kernel;
@@ -30,7 +30,9 @@ void compare_exchange_test(queue q, size_t N) {
       cgh.parallel_for<compare_exchange_kernel<T>>(range<1>(N), [=](item<1>
                                                                         it) {
         size_t gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(exc[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(exc[0]);
         T result = T(N); // Avoid copying pointer
         bool success = atm.compare_exchange_strong(result, (T)gid);
         if (success) {
diff --git a/sycl/test/atomic_ref/exchange.cpp b/sycl/test/atomic_ref/exchange.cpp
index b4445928ea075..61e1114b0c1b8 100644
--- a/sycl/test/atomic_ref/exchange.cpp
+++ b/sycl/test/atomic_ref/exchange.cpp
@@ -9,7 +9,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T>
 class exchange_kernel;
@@ -29,7 +29,9 @@ void exchange_test(queue q, size_t N) {
       auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for<exchange_kernel<T>>(range<1>(N), [=](item<1> it) {
         size_t gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(exc[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(exc[0]);
         out[gid] = atm.exchange(T(gid));
       });
     });
diff --git a/sycl/test/atomic_ref/load.cpp b/sycl/test/atomic_ref/load.cpp
index 30ae13e16e65e..4e044dc7541f6 100644
--- a/sycl/test/atomic_ref/load.cpp
+++ b/sycl/test/atomic_ref/load.cpp
@@ -9,7 +9,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T>
 class load_kernel;
@@ -29,7 +29,9 @@ void load_test(queue q, size_t N) {
       auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for<load_kernel<T>>(range<1>(N), [=](item<1> it) {
         size_t gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(ld[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(ld[0]);
         out[gid] = atm.load();
       });
     });
diff --git a/sycl/test/atomic_ref/max.cpp b/sycl/test/atomic_ref/max.cpp
index 0c95653b8219b..2bc7067e01650 100644
--- a/sycl/test/atomic_ref/max.cpp
+++ b/sycl/test/atomic_ref/max.cpp
@@ -10,7 +10,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T>
 void max_test(queue q, size_t N) {
@@ -27,7 +27,9 @@ void max_test(queue q, size_t N) {
       auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(val[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(val[0]);
 
         // +1 accounts for lowest() returning 0 for unsigned types
         out[gid] = atm.fetch_max(T(gid) + 1);
diff --git a/sycl/test/atomic_ref/min.cpp b/sycl/test/atomic_ref/min.cpp
index 6a0e32ca14bb5..05e41bb36597e 100644
--- a/sycl/test/atomic_ref/min.cpp
+++ b/sycl/test/atomic_ref/min.cpp
@@ -10,7 +10,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T>
 void min_test(queue q, size_t N) {
@@ -27,7 +27,9 @@ void min_test(queue q, size_t N) {
       auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(val[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(val[0]);
         out[gid] = atm.fetch_min(T(gid));
       });
     });
diff --git a/sycl/test/atomic_ref/store.cpp b/sycl/test/atomic_ref/store.cpp
index db076ee994a3d..4c29e8947ea7f 100644
--- a/sycl/test/atomic_ref/store.cpp
+++ b/sycl/test/atomic_ref/store.cpp
@@ -9,7 +9,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T>
 class store_kernel;
@@ -24,7 +24,9 @@ void store_test(queue q, size_t N) {
       auto st = store_buf.template get_access<access::mode::read_write>(cgh);
       cgh.parallel_for<store_kernel<T>>(range<1>(N), [=](item<1> it) {
         size_t gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(st[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(st[0]);
         atm.store(T(gid));
       });
     });
diff --git a/sycl/test/atomic_ref/sub.cpp b/sycl/test/atomic_ref/sub.cpp
index 10ed75d21da25..a51ad5a2e9e28 100644
--- a/sycl/test/atomic_ref/sub.cpp
+++ b/sycl/test/atomic_ref/sub.cpp
@@ -10,7 +10,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T, typename Difference = T>
 void sub_fetch_test(queue q, size_t N) {
@@ -23,10 +23,13 @@ void sub_fetch_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto val = val_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(val[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(val[0]);
         out[gid] = atm.fetch_sub(Difference(1));
       });
     });
@@ -56,10 +59,13 @@ void sub_plus_equal_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto val = val_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(val[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(val[0]);
         out[gid] = atm -= Difference(1);
       });
     });
@@ -89,10 +95,13 @@ void sub_pre_dec_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto val = val_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(val[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(val[0]);
         out[gid] = --atm;
       });
     });
@@ -122,10 +131,13 @@ void sub_post_dec_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto val = val_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(val[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(val[0]);
         out[gid] = atm--;
       });
     });
@@ -153,13 +165,11 @@ void sub_test(queue q, size_t N) {
 }
 
 // Floating-point types do not support pre- or post-decrement
-template <>
-void sub_test<float>(queue q, size_t N) {
+template <> void sub_test<float>(queue q, size_t N) {
   sub_fetch_test<float>(q, N);
   sub_plus_equal_test<float>(q, N);
 }
-template <>
-void sub_test<double>(queue q, size_t N) {
+template <> void sub_test<double>(queue q, size_t N) {
   sub_fetch_test<double>(q, N);
   sub_plus_equal_test<double>(q, N);
 }
diff --git a/sycl/test/built-ins/printf.cpp b/sycl/test/built-ins/printf.cpp
index 6536498587729..768862f40cfdb 100644
--- a/sycl/test/built-ins/printf.cpp
+++ b/sycl/test/built-ins/printf.cpp
@@ -41,7 +41,7 @@ int main() {
     Queue.submit([&](handler &CGH) {
       CGH.single_task<class integral>([=]() {
         // String
-        intel::experimental::printf(format_hello_world);
+        ONEAPI::printf(format_hello_world);
         // Due to a bug in Intel CPU Runtime for OpenCL on Windows, information
         // printed using such format strings (without %-specifiers) might
         // appear in different order if output is redirected to a file or
@@ -50,8 +50,8 @@ int main() {
         // CHECK: {{(Hello, World!)?}}
 
         // Integral types
-        intel::experimental::printf(format_int, (int32_t)123);
-        intel::experimental::printf(format_int, (int32_t)-123);
+        ONEAPI::printf(format_int, (int32_t)123);
+        ONEAPI::printf(format_int, (int32_t)-123);
         // CHECK: 123
         // CHECK-NEXT: -123
 
@@ -60,8 +60,8 @@ int main() {
           // You can declare format string in non-global scope, but in this case
           // static keyword is required
           static const CONSTANT char format[] = "%f\n";
-          intel::experimental::printf(format, 33.4f);
-          intel::experimental::printf(format, -33.4f);
+          ONEAPI::printf(format, 33.4f);
+          ONEAPI::printf(format, -33.4f);
         }
         // CHECK-NEXT: 33.4
         // CHECK-NEXT: -33.4
@@ -73,21 +73,21 @@ int main() {
         using ocl_int4 = cl::sycl::vec<int, 4>::vector_t;
         {
           static const CONSTANT char format[] = "%v4d\n";
-          intel::experimental::printf(format, (ocl_int4)v4);
+          ONEAPI::printf(format, (ocl_int4)v4);
         }
 
         // However, you are still able to print them by-element:
         {
-          intel::experimental::printf(format_vec, (int32_t)v4.w(),
+          ONEAPI::printf(format_vec, (int32_t)v4.w(),
                                       (int32_t)v4.z(), (int32_t)v4.y(),
                                       (int32_t)v4.x());
         }
 #else
         // On host side you always have to print them by-element:
-        intel::experimental::printf(format_vec, (int32_t)v4.x(),
+        ONEAPI::printf(format_vec, (int32_t)v4.x(),
                                     (int32_t)v4.y(), (int32_t)v4.z(),
                                     (int32_t)v4.w());
-        intel::experimental::printf(format_vec, (int32_t)v4.w(),
+        ONEAPI::printf(format_vec, (int32_t)v4.w(),
                                     (int32_t)v4.z(), (int32_t)v4.y(),
                                     (int32_t)v4.x());
 #endif // __SYCL_DEVICE_ONLY__
@@ -100,7 +100,7 @@ int main() {
         // According to OpenCL spec, argument should be a void pointer
         {
           static const CONSTANT char format[] = "%p\n";
-          intel::experimental::printf(format, (void *)Ptr);
+          ONEAPI::printf(format, (void *)Ptr);
         }
         // CHECK-NEXT: {{(0x)?[0-9a-fA-F]+$}}
       });
@@ -111,7 +111,7 @@ int main() {
     Queue.submit([&](handler &CGH) {
       CGH.parallel_for<class stream_string>(range<1>(10), [=](id<1> i) {
         // cast to uint64_t to be sure that we pass 64-bit unsigned value
-        intel::experimental::printf(format_hello_world_2, (uint64_t)i.get(0));
+        ONEAPI::printf(format_hello_world_2, (uint64_t)i.get(0));
       });
     });
     Queue.wait();
diff --git a/sycl/test/function-pointers/fp-as-kernel-arg.cpp b/sycl/test/function-pointers/fp-as-kernel-arg.cpp
index c68a891dcf94c..66da8a4a640cd 100644
--- a/sycl/test/function-pointers/fp-as-kernel-arg.cpp
+++ b/sycl/test/function-pointers/fp-as-kernel-arg.cpp
@@ -31,7 +31,7 @@ int main() {
   P.build_with_kernel_type<class K>();
   cl::sycl::kernel KE = P.get_kernel<class K>();
 
-  auto FptrStorage = cl::sycl::intel::get_device_func_ptr(&add, "add", P, D);
+  auto FptrStorage = cl::sycl::ONEAPI::get_device_func_ptr(&add, "add", P, D);
   if (!D.is_host()) {
     // FIXME: update this check with query to supported extension
     // For now, we don't have runtimes that report required OpenCL extension and
@@ -55,7 +55,7 @@ int main() {
     CGH.parallel_for<class K>(
         KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) {
       auto Fptr =
-          cl::sycl::intel::to_device_func_ptr<decltype(add)>(FptrStorage);
+          cl::sycl::ONEAPI::to_device_func_ptr<decltype(add)>(FptrStorage);
       AccA[Index] = Fptr(AccA[Index], AccB[Index]);
     });
   });
diff --git a/sycl/test/function-pointers/pass-fp-through-buffer.cpp b/sycl/test/function-pointers/pass-fp-through-buffer.cpp
index 67ecea5509a5f..495513d08c87c 100644
--- a/sycl/test/function-pointers/pass-fp-through-buffer.cpp
+++ b/sycl/test/function-pointers/pass-fp-through-buffer.cpp
@@ -34,12 +34,12 @@ int main() {
   P.build_with_kernel_type<class K>();
   cl::sycl::kernel KE = P.get_kernel<class K>();
 
-  cl::sycl::buffer<cl::sycl::intel::device_func_ptr_holder_t> DispatchTable(2);
+  cl::sycl::buffer<cl::sycl::ONEAPI::device_func_ptr_holder_t> DispatchTable(2);
   {
     auto DTAcc =
         DispatchTable.get_access<cl::sycl::access::mode::discard_write>();
-    DTAcc[0] = cl::sycl::intel::get_device_func_ptr(&add, "add", P, D);
-    DTAcc[1] = cl::sycl::intel::get_device_func_ptr(&sub, "sub", P, D);
+    DTAcc[0] = cl::sycl::ONEAPI::get_device_func_ptr(&add, "add", P, D);
+    DTAcc[1] = cl::sycl::ONEAPI::get_device_func_ptr(&sub, "sub", P, D);
     if (!D.is_host()) {
       // FIXME: update this check with query to supported extension
       // For now, we don't have runtimes that report required OpenCL extension
@@ -70,7 +70,7 @@ int main() {
       CGH.parallel_for<class K>(
           KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) {
         auto FP =
-            cl::sycl::intel::to_device_func_ptr<int(int, int)>(AccDT[Mode]);
+            cl::sycl::ONEAPI::to_device_func_ptr<int(int, int)>(AccDT[Mode]);
 
         AccA[Index] = FP(AccA[Index], AccB[Index]);
       });
diff --git a/sycl/test/group-algorithm/all_of.cpp b/sycl/test/group-algorithm/all_of.cpp
index 2a175d000bb6f..cc19772af0c12 100644
--- a/sycl/test/group-algorithm/all_of.cpp
+++ b/sycl/test/group-algorithm/all_of.cpp
@@ -12,7 +12,7 @@
 #include <cassert>
 #include <numeric>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <class Predicate>
 class all_of_kernel;
diff --git a/sycl/test/group-algorithm/any_of.cpp b/sycl/test/group-algorithm/any_of.cpp
index 6ce61afaffdec..45ddc41f6355f 100644
--- a/sycl/test/group-algorithm/any_of.cpp
+++ b/sycl/test/group-algorithm/any_of.cpp
@@ -12,7 +12,7 @@
 #include <cassert>
 #include <numeric>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <class Predicate>
 class any_of_kernel;
diff --git a/sycl/test/group-algorithm/broadcast.cpp b/sycl/test/group-algorithm/broadcast.cpp
index df0887a40d4a0..6ba7c3199f8e0 100644
--- a/sycl/test/group-algorithm/broadcast.cpp
+++ b/sycl/test/group-algorithm/broadcast.cpp
@@ -12,7 +12,7 @@
 #include <cassert>
 #include <numeric>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 class broadcast_kernel;
 
diff --git a/sycl/test/group-algorithm/exclusive_scan.cpp b/sycl/test/group-algorithm/exclusive_scan.cpp
index 47dc1f6122720..5aaf0373eea57 100644
--- a/sycl/test/group-algorithm/exclusive_scan.cpp
+++ b/sycl/test/group-algorithm/exclusive_scan.cpp
@@ -14,7 +14,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <class BinaryOperation, int TestNumber>
 class exclusive_scan_kernel;
diff --git a/sycl/test/group-algorithm/inclusive_scan.cpp b/sycl/test/group-algorithm/inclusive_scan.cpp
index 54311a162ed9e..75cf06c7c371e 100644
--- a/sycl/test/group-algorithm/inclusive_scan.cpp
+++ b/sycl/test/group-algorithm/inclusive_scan.cpp
@@ -14,7 +14,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <class BinaryOperation, int TestNumber>
 class inclusive_scan_kernel;
diff --git a/sycl/test/group-algorithm/leader.cpp b/sycl/test/group-algorithm/leader.cpp
index ff02cf7e77f9e..afe5e28648a6e 100644
--- a/sycl/test/group-algorithm/leader.cpp
+++ b/sycl/test/group-algorithm/leader.cpp
@@ -10,7 +10,7 @@
 #include <CL/sycl.hpp>
 #include <cassert>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 class leader_kernel;
 
diff --git a/sycl/test/group-algorithm/none_of.cpp b/sycl/test/group-algorithm/none_of.cpp
index c8b56158d20b7..7a08b6463a682 100644
--- a/sycl/test/group-algorithm/none_of.cpp
+++ b/sycl/test/group-algorithm/none_of.cpp
@@ -12,7 +12,7 @@
 #include <cassert>
 #include <numeric>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <class Predicate>
 class none_of_kernel;
diff --git a/sycl/test/group-algorithm/reduce.cpp b/sycl/test/group-algorithm/reduce.cpp
index 64ed0bd82fcc2..ef65439404884 100644
--- a/sycl/test/group-algorithm/reduce.cpp
+++ b/sycl/test/group-algorithm/reduce.cpp
@@ -13,7 +13,7 @@
 #include <limits>
 #include <numeric>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <class BinaryOperation>
 class reduce_kernel;
diff --git a/sycl/test/linear_id/linear-sub_group.cpp b/sycl/test/linear_id/linear-sub_group.cpp
index 2b3f75ae2182e..67d039700fb8b 100644
--- a/sycl/test/linear_id/linear-sub_group.cpp
+++ b/sycl/test/linear_id/linear-sub_group.cpp
@@ -38,7 +38,7 @@ int main(int argc, char *argv[]) {
           nd_range<2>(range<2>(outer, inner), range<2>(outer, inner)),
           [=](nd_item<2> it) {
             id<2> idx = it.get_global_id();
-            intel::sub_group sg = it.get_sub_group();
+            ONEAPI::sub_group sg = it.get_sub_group();
             output[idx] = sg.get_group_id()[0] * sg.get_local_range()[0] +
                           sg.get_local_id()[0];
           });
diff --git a/sycl/test/reduction/reduction_ctor.cpp b/sycl/test/reduction/reduction_ctor.cpp
index cbf1b5907cfc2..052437130c8b3 100644
--- a/sycl/test/reduction/reduction_ctor.cpp
+++ b/sycl/test/reduction/reduction_ctor.cpp
@@ -49,7 +49,7 @@ void testKnown(T Identity, BinaryOperation BOp, T A, T B) {
     // This accessor is not really used in this test.
     accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
         ReduAcc(ReduBuf, CGH);
-    auto Redu = intel::reduction(ReduAcc, BOp);
+    auto Redu = ONEAPI::reduction(ReduAcc, BOp);
     assert(Redu.getIdentity() == Identity &&
            "Failed getIdentity() check().");
     test_reducer(Redu, A, B);
@@ -69,7 +69,7 @@ void testUnknown(T Identity, BinaryOperation BOp, T A, T B) {
     // This accessor is not really used in this test.
     accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
         ReduAcc(ReduBuf, CGH);
-    auto Redu = intel::reduction(ReduAcc, Identity, BOp);
+    auto Redu = ONEAPI::reduction(ReduAcc, Identity, BOp);
     assert(Redu.getIdentity() == Identity &&
            "Failed getIdentity() check().");
     test_reducer(Redu, Identity, BOp, A, B);
@@ -88,18 +88,18 @@ void testBoth(T Identity, BinaryOperation BOp, T A, T B) {
 }
 
 int main() {
-  testBoth<int>(0, intel::plus<int>(), 1, 7);
+  testBoth<int>(0, ONEAPI::plus<int>(), 1, 7);
   testBoth<int>(1, std::multiplies<int>(), 1, 7);
-  testBoth<int>(0, intel::bit_or<int>(), 1, 8);
-  testBoth<int>(0, intel::bit_xor<int>(), 7, 3);
-  testBoth<int>(~0, intel::bit_and<int>(), 7, 3);
-  testBoth<int>((std::numeric_limits<int>::max)(), intel::minimum<int>(), 7, 3);
-  testBoth<int>((std::numeric_limits<int>::min)(), intel::maximum<int>(), 7, 3);
+  testBoth<int>(0, ONEAPI::bit_or<int>(), 1, 8);
+  testBoth<int>(0, ONEAPI::bit_xor<int>(), 7, 3);
+  testBoth<int>(~0, ONEAPI::bit_and<int>(), 7, 3);
+  testBoth<int>((std::numeric_limits<int>::max)(), ONEAPI::minimum<int>(), 7, 3);
+  testBoth<int>((std::numeric_limits<int>::min)(), ONEAPI::maximum<int>(), 7, 3);
 
-  testBoth<float>(0, intel::plus<float>(), 1, 7);
+  testBoth<float>(0, ONEAPI::plus<float>(), 1, 7);
   testBoth<float>(1, std::multiplies<float>(), 1, 7);
-  testBoth<float>(getMaximumFPValue<float>(), intel::minimum<float>(), 7, 3);
-  testBoth<float>(getMinimumFPValue<float>(), intel::maximum<float>(), 7, 3);
+  testBoth<float>(getMaximumFPValue<float>(), ONEAPI::minimum<float>(), 7, 3);
+  testBoth<float>(getMinimumFPValue<float>(), ONEAPI::maximum<float>(), 7, 3);
 
   testUnknown<CustomVec<float>, 0,
               Unknown<CustomVec<float>, 0, CustomVecPlus<float>>>(
diff --git a/sycl/test/reduction/reduction_nd_conditional.cpp b/sycl/test/reduction/reduction_nd_conditional.cpp
index c700097993079..985f77cbc77f4 100644
--- a/sycl/test/reduction/reduction_nd_conditional.cpp
+++ b/sycl/test/reduction/reduction_nd_conditional.cpp
@@ -85,7 +85,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
         Out(OutBuf, CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
@@ -114,10 +114,10 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 }
 
 int main() {
-  test<int, 0, intel::plus<int>>(0, 2, 2);
-  test<int, 1, intel::plus<int>>(0, 7, 7);
-  test<int, 0, intel::plus<int>>(0, 2, 64);
-  test<short, 1, intel::plus<short>>(0, 16, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 2, 2);
+  test<int, 1, ONEAPI::plus<int>>(0, 7, 7);
+  test<int, 0, ONEAPI::plus<int>>(0, 2, 64);
+  test<short, 1, ONEAPI::plus<short>>(0, 16, 256);
 
   std::cout << "Test passed\n";
   return 0;
diff --git a/sycl/test/reduction/reduction_nd_ext_type.hpp b/sycl/test/reduction/reduction_nd_ext_type.hpp
index a80aefc09cd45..4cb182e82c7ab 100644
--- a/sycl/test/reduction/reduction_nd_ext_type.hpp
+++ b/sycl/test/reduction/reduction_nd_ext_type.hpp
@@ -30,7 +30,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     accessor<T, Dim, Mode, access::target::global_buffer>
         Out(OutBuf, CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
@@ -64,15 +64,15 @@ int runTests(const string_class &ExtensionName) {
 
   // Check some less standards WG sizes and corner cases first.
   test<T, 1, access::mode::read_write, std::multiplies<T>>(0, 4, 4);
-  test<T, 0, access::mode::discard_write, intel::plus<T>>(0, 4, 64);
+  test<T, 0, access::mode::discard_write, ONEAPI::plus<T>>(0, 4, 64);
 
-  test<T, 0, access::mode::read_write, intel::minimum<T>>(getMaximumFPValue<T>(), 7, 7);
-  test<T, 1, access::mode::discard_write, intel::maximum<T>>(getMinimumFPValue<T>(), 7, 7 * 5);
+  test<T, 0, access::mode::read_write, ONEAPI::minimum<T>>(getMaximumFPValue<T>(), 7, 7);
+  test<T, 1, access::mode::discard_write, ONEAPI::maximum<T>>(getMinimumFPValue<T>(), 7, 7 * 5);
 
 #if __cplusplus >= 201402L
-  test<T, 1, access::mode::read_write, intel::plus<>>(1, 3, 3 * 5);
-  test<T, 1, access::mode::discard_write, intel::minimum<>>(getMaximumFPValue<T>(), 3, 3);
-  test<T, 0, access::mode::discard_write, intel::maximum<>>(getMinimumFPValue<T>(), 3, 3);
+  test<T, 1, access::mode::read_write, ONEAPI::plus<>>(1, 3, 3 * 5);
+  test<T, 1, access::mode::discard_write, ONEAPI::minimum<>>(getMaximumFPValue<T>(), 3, 3);
+  test<T, 0, access::mode::discard_write, ONEAPI::maximum<>>(getMinimumFPValue<T>(), 3, 3);
 #endif // __cplusplus >= 201402L
 
   std::cout << "Test passed\n";
diff --git a/sycl/test/reduction/reduction_nd_lambda.cpp b/sycl/test/reduction/reduction_nd_lambda.cpp
index 3d5cf21658995..37c435b9b0f07 100644
--- a/sycl/test/reduction/reduction_nd_lambda.cpp
+++ b/sycl/test/reduction/reduction_nd_lambda.cpp
@@ -1,5 +1,5 @@
 // UNSUPPORTED: cuda
-// Reductions use work-group builtins (e.g. intel::reduce()) not yet supported
+// Reductions use work-group builtins (e.g. ONEAPI::reduce()) not yet supported
 // by CUDA.
 //
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
@@ -30,7 +30,7 @@ void test(T Identity, BinaryOperation BOp, size_t WGSize, size_t NWItems) {
   Q.submit([&](handler &CGH) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     auto Out = OutBuf.template get_access<access::mode::discard_write>(CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
diff --git a/sycl/test/reduction/reduction_nd_s0_dw.cpp b/sycl/test/reduction/reduction_nd_s0_dw.cpp
index 834ccf4407649..8b900a3a1fd9a 100644
--- a/sycl/test/reduction/reduction_nd_s0_dw.cpp
+++ b/sycl/test/reduction/reduction_nd_s0_dw.cpp
@@ -35,7 +35,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
         Out(OutBuf, CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
@@ -59,33 +59,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
   // Check some less standards WG sizes and corner cases first.
-  test<int, 0, intel::plus<int>>(0, 2, 2);
-  test<int, 0, intel::plus<int>>(0, 7, 7);
-  test<int, 0, intel::plus<int>>(0, 9, 18);
-  test<int, 0, intel::plus<int>>(0, 49, 49 * 5);
+  test<int, 0, ONEAPI::plus<int>>(0, 2, 2);
+  test<int, 0, ONEAPI::plus<int>>(0, 7, 7);
+  test<int, 0, ONEAPI::plus<int>>(0, 9, 18);
+  test<int, 0, ONEAPI::plus<int>>(0, 49, 49 * 5);
 
   // Try some power-of-two work-group sizes.
-  test<int, 0, intel::plus<int>>(0, 2, 64);
-  test<int, 0, intel::plus<int>>(0, 4, 64);
-  test<int, 0, intel::plus<int>>(0, 8, 128);
-  test<int, 0, intel::plus<int>>(0, 16, 256);
-  test<int, 0, intel::plus<int>>(0, 32, 256);
-  test<int, 0, intel::plus<int>>(0, 64, 256);
-  test<int, 0, intel::plus<int>>(0, 128, 256);
-  test<int, 0, intel::plus<int>>(0, 256, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 2, 64);
+  test<int, 0, ONEAPI::plus<int>>(0, 4, 64);
+  test<int, 0, ONEAPI::plus<int>>(0, 8, 128);
+  test<int, 0, ONEAPI::plus<int>>(0, 16, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 32, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 64, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 128, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 256, 256);
 
   // Check with various operations.
   test<int, 0, std::multiplies<int>>(1, 8, 256);
-  test<int, 0, intel::bit_or<int>>(0, 8, 256);
-  test<int, 0, intel::bit_xor<int>>(0, 8, 256);
-  test<int, 0, intel::bit_and<int>>(~0, 8, 256);
-  test<int, 0, intel::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
-  test<int, 0, intel::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
+  test<int, 0, ONEAPI::bit_or<int>>(0, 8, 256);
+  test<int, 0, ONEAPI::bit_xor<int>>(0, 8, 256);
+  test<int, 0, ONEAPI::bit_and<int>>(~0, 8, 256);
+  test<int, 0, ONEAPI::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
+  test<int, 0, ONEAPI::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
 
   // Check with various types.
   test<float, 0, std::multiplies<float>>(1, 8, 256);
-  test<float, 0, intel::minimum<float>>(getMaximumFPValue<float>(), 8, 256);
-  test<float, 0, intel::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
+  test<float, 0, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 8, 256);
+  test<float, 0, ONEAPI::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
 
   // Check with CUSTOM type.
   test<CustomVec<long long>, 0, CustomVecPlus<long long>>(CustomVec<long long>(0), 8, 256);
diff --git a/sycl/test/reduction/reduction_nd_s0_rw.cpp b/sycl/test/reduction/reduction_nd_s0_rw.cpp
index 2040b632e07fb..029458942390a 100644
--- a/sycl/test/reduction/reduction_nd_s0_rw.cpp
+++ b/sycl/test/reduction/reduction_nd_s0_rw.cpp
@@ -37,7 +37,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     accessor<T, Dim, access::mode::read_write, access::target::global_buffer>
         Out(OutBuf, CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
@@ -61,33 +61,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
   // Check some less standards WG sizes and corner cases first.
-  test<int, 0, intel::plus<int>>(0, 2, 2);
-  test<int, 0, intel::plus<int>>(0, 7, 7);
-  test<int, 0, intel::plus<int>>(0, 9, 18);
-  test<int, 0, intel::plus<int>>(0, 49, 49 * 5);
+  test<int, 0, ONEAPI::plus<int>>(0, 2, 2);
+  test<int, 0, ONEAPI::plus<int>>(0, 7, 7);
+  test<int, 0, ONEAPI::plus<int>>(0, 9, 18);
+  test<int, 0, ONEAPI::plus<int>>(0, 49, 49 * 5);
 
   // Try some power-of-two work-group sizes.
-  test<int, 0, intel::plus<int>>(0, 2, 64);
-  test<int, 0, intel::plus<int>>(0, 4, 64);
-  test<int, 0, intel::plus<int>>(0, 8, 128);
-  test<int, 0, intel::plus<int>>(0, 16, 256);
-  test<int, 0, intel::plus<int>>(0, 32, 256);
-  test<int, 0, intel::plus<int>>(0, 64, 256);
-  test<int, 0, intel::plus<int>>(0, 128, 256);
-  test<int, 0, intel::plus<int>>(0, 256, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 2, 64);
+  test<int, 0, ONEAPI::plus<int>>(0, 4, 64);
+  test<int, 0, ONEAPI::plus<int>>(0, 8, 128);
+  test<int, 0, ONEAPI::plus<int>>(0, 16, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 32, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 64, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 128, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 256, 256);
 
   // Check with various operations.
   test<int, 0, std::multiplies<int>>(1, 8, 256);
-  test<int, 0, intel::bit_or<int>>(0, 8, 256);
-  test<int, 0, intel::bit_xor<int>>(0, 8, 256);
-  test<int, 0, intel::bit_and<int>>(~0, 8, 256);
-  test<int, 0, intel::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
-  test<int, 0, intel::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
+  test<int, 0, ONEAPI::bit_or<int>>(0, 8, 256);
+  test<int, 0, ONEAPI::bit_xor<int>>(0, 8, 256);
+  test<int, 0, ONEAPI::bit_and<int>>(~0, 8, 256);
+  test<int, 0, ONEAPI::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
+  test<int, 0, ONEAPI::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
 
   // Check with various types.
   test<float, 0, std::multiplies<float>>(1, 8, 256);
-  test<float, 0, intel::minimum<float>>(getMaximumFPValue<float>(), 8, 256);
-  test<float, 0, intel::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
+  test<float, 0, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 8, 256);
+  test<float, 0, ONEAPI::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
 
   // Check with CUSTOM type.
   test<CustomVec<long long>, 0, CustomVecPlus<long long>>(CustomVec<long long>(0), 8, 256);
diff --git a/sycl/test/reduction/reduction_nd_s1_dw.cpp b/sycl/test/reduction/reduction_nd_s1_dw.cpp
index 9fe36d69daa8c..7cc975e261dc2 100644
--- a/sycl/test/reduction/reduction_nd_s1_dw.cpp
+++ b/sycl/test/reduction/reduction_nd_s1_dw.cpp
@@ -36,7 +36,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
         Out(OutBuf, CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
@@ -60,33 +60,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
   // Check some less standards WG sizes and corner cases first.
-  test<int, 1, intel::plus<int>>(0, 2, 2);
-  test<int, 1, intel::plus<int>>(0, 7, 7);
-  test<int, 1, intel::plus<int>>(0, 9, 18);
-  test<int, 1, intel::plus<int>>(0, 49, 49 * 5);
+  test<int, 1, ONEAPI::plus<int>>(0, 2, 2);
+  test<int, 1, ONEAPI::plus<int>>(0, 7, 7);
+  test<int, 1, ONEAPI::plus<int>>(0, 9, 18);
+  test<int, 1, ONEAPI::plus<int>>(0, 49, 49 * 5);
 
   // Try some power-of-two work-group sizes.
-  test<int, 1, intel::plus<int>>(0, 2, 64);
-  test<int, 1, intel::plus<int>>(0, 4, 64);
-  test<int, 1, intel::plus<int>>(0, 8, 128);
-  test<int, 1, intel::plus<int>>(0, 16, 256);
-  test<int, 1, intel::plus<int>>(0, 32, 256);
-  test<int, 1, intel::plus<int>>(0, 64, 256);
-  test<int, 1, intel::plus<int>>(0, 128, 256);
-  test<int, 1, intel::plus<int>>(0, 256, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 2, 64);
+  test<int, 1, ONEAPI::plus<int>>(0, 4, 64);
+  test<int, 1, ONEAPI::plus<int>>(0, 8, 128);
+  test<int, 1, ONEAPI::plus<int>>(0, 16, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 32, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 64, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 128, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 256, 256);
 
   // Check with various operations.
   test<int, 1, std::multiplies<int>>(1, 8, 256);
-  test<int, 1, intel::bit_or<int>>(0, 8, 256);
-  test<int, 1, intel::bit_xor<int>>(0, 8, 256);
-  test<int, 1, intel::bit_and<int>>(~0, 8, 256);
-  test<int, 1, intel::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
-  test<int, 1, intel::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
+  test<int, 1, ONEAPI::bit_or<int>>(0, 8, 256);
+  test<int, 1, ONEAPI::bit_xor<int>>(0, 8, 256);
+  test<int, 1, ONEAPI::bit_and<int>>(~0, 8, 256);
+  test<int, 1, ONEAPI::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
+  test<int, 1, ONEAPI::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
 
   // Check with various types.
   test<float, 1, std::multiplies<float>>(1, 8, 256);
-  test<float, 1, intel::minimum<float>>(getMaximumFPValue<float>(), 8, 256);
-  test<float, 1, intel::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
+  test<float, 1, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 8, 256);
+  test<float, 1, ONEAPI::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
 
   // Check with CUSTOM type.
   test<CustomVec<long long>, 1, CustomVecPlus<long long>>(CustomVec<long long>(0), 8, 256);
diff --git a/sycl/test/reduction/reduction_nd_s1_rw.cpp b/sycl/test/reduction/reduction_nd_s1_rw.cpp
index d283fbe3cebe0..2c8f6a8343e83 100644
--- a/sycl/test/reduction/reduction_nd_s1_rw.cpp
+++ b/sycl/test/reduction/reduction_nd_s1_rw.cpp
@@ -38,7 +38,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     accessor<T, Dim, access::mode::read_write, access::target::global_buffer>
         Out(OutBuf, CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
@@ -62,33 +62,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
   // Check some less standards WG sizes and corner cases first.
-  test<int, 1, intel::plus<int>>(0, 2, 2);
-  test<int, 1, intel::plus<int>>(0, 7, 7);
-  test<int, 1, intel::plus<int>>(0, 9, 18);
-  test<int, 1, intel::plus<int>>(0, 49, 49 * 5);
+  test<int, 1, ONEAPI::plus<int>>(0, 2, 2);
+  test<int, 1, ONEAPI::plus<int>>(0, 7, 7);
+  test<int, 1, ONEAPI::plus<int>>(0, 9, 18);
+  test<int, 1, ONEAPI::plus<int>>(0, 49, 49 * 5);
 
   // Try some power-of-two work-group sizes.
-  test<int, 1, intel::plus<int>>(0, 2, 64);
-  test<int, 1, intel::plus<int>>(0, 4, 64);
-  test<int, 1, intel::plus<int>>(0, 8, 128);
-  test<int, 1, intel::plus<int>>(0, 16, 256);
-  test<int, 1, intel::plus<int>>(0, 32, 256);
-  test<int, 1, intel::plus<int>>(0, 64, 256);
-  test<int, 1, intel::plus<int>>(0, 128, 256);
-  test<int, 1, intel::plus<int>>(0, 256, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 2, 64);
+  test<int, 1, ONEAPI::plus<int>>(0, 4, 64);
+  test<int, 1, ONEAPI::plus<int>>(0, 8, 128);
+  test<int, 1, ONEAPI::plus<int>>(0, 16, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 32, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 64, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 128, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 256, 256);
 
   // Check with various operations.
   test<int, 1, std::multiplies<int>>(1, 8, 256);
-  test<int, 1, intel::bit_or<int>>(0, 8, 256);
-  test<int, 1, intel::bit_xor<int>>(0, 8, 256);
-  test<int, 1, intel::bit_and<int>>(~0, 8, 256);
-  test<int, 1, intel::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
-  test<int, 1, intel::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
+  test<int, 1, ONEAPI::bit_or<int>>(0, 8, 256);
+  test<int, 1, ONEAPI::bit_xor<int>>(0, 8, 256);
+  test<int, 1, ONEAPI::bit_and<int>>(~0, 8, 256);
+  test<int, 1, ONEAPI::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
+  test<int, 1, ONEAPI::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
 
   // Check with various types.
   test<float, 1, std::multiplies<float>>(1, 8, 256);
-  test<float, 1, intel::minimum<float>>(getMaximumFPValue<float>(), 1, 16);
-  test<float, 1, intel::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
+  test<float, 1, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 1, 16);
+  test<float, 1, ONEAPI::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
 
   // Check with CUSTOM type.
   test<CustomVec<long long>, 1, CustomVecPlus<long long>>(CustomVec<long long>(0), 8, 256);
diff --git a/sycl/test/reduction/reduction_placeholder.cpp b/sycl/test/reduction/reduction_placeholder.cpp
index e972105bbab50..03278e44939ff 100644
--- a/sycl/test/reduction/reduction_placeholder.cpp
+++ b/sycl/test/reduction/reduction_placeholder.cpp
@@ -7,7 +7,7 @@
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
 // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out
-// TODO: Enable the test for HOST when it supports intel::reduce() and barrier()
+// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and barrier()
 
 // This test performs basic checks of parallel_for(nd_range, reduction, func)
 // with reductions initialized with a placeholder accessor.
@@ -41,7 +41,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
   Q.submit([&](handler &CGH) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     CGH.require(Out);
-    auto Redu = intel::reduction(Out, Identity, BinaryOperation());
+    auto Redu = ONEAPI::reduction(Out, Identity, BinaryOperation());
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
     nd_range<1> NDRange(GlobalRange, LocalRange);
@@ -64,16 +64,16 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
   // fast atomics and fast reduce
-  test<int, 1, intel::plus<int>>(0, 49, 49 * 5);
-  test<int, 0, intel::plus<int>>(0, 8, 8);
+  test<int, 1, ONEAPI::plus<int>>(0, 49, 49 * 5);
+  test<int, 0, ONEAPI::plus<int>>(0, 8, 8);
 
   // fast atomics
-  test<int, 0, intel::bit_or<int>>(0, 7, 7 * 3);
-  test<int, 1, intel::bit_or<int>>(0, 4, 128);
+  test<int, 0, ONEAPI::bit_or<int>>(0, 7, 7 * 3);
+  test<int, 1, ONEAPI::bit_or<int>>(0, 4, 128);
 
   // fast reduce
-  test<float, 1, intel::minimum<float>>(getMaximumFPValue<float>(), 5, 5 * 7);
-  test<float, 0, intel::maximum<float>>(getMinimumFPValue<float>(), 4, 128);
+  test<float, 1, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 5, 5 * 7);
+  test<float, 0, ONEAPI::maximum<float>>(getMinimumFPValue<float>(), 4, 128);
 
   // generic algorithm
   test<int, 0, std::multiplies<int>>(1, 7, 7 * 5);
diff --git a/sycl/test/reduction/reduction_transparent.cpp b/sycl/test/reduction/reduction_transparent.cpp
index fd527f8f4e0ef..64eecca05663b 100644
--- a/sycl/test/reduction/reduction_transparent.cpp
+++ b/sycl/test/reduction/reduction_transparent.cpp
@@ -7,7 +7,7 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 
 // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out
-// TODO: Enable the test for HOST when it supports intel::reduce() and barrier()
+// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and barrier()
 
 // This test performs basic checks of parallel_for(nd_range, reduction, func)
 // where func is a transparent functor.
@@ -46,7 +46,7 @@ void testId(T Identity, size_t WGSize, size_t NWItems) {
     range<1> LocalRange(WGSize);
     nd_range<1> NDRange(GlobalRange, LocalRange);
     CGH.parallel_for<SomeIdClass<T, Dim, BinaryOperation>>(
-        NDRange, intel::reduction(Out, Identity, BOp), [=](nd_item<1> NDIt, auto &Sum) {
+        NDRange, ONEAPI::reduction(Out, Identity, BOp), [=](nd_item<1> NDIt, auto &Sum) {
           Sum.combine(In[NDIt.get_global_linear_id()]);
         });
   });
@@ -86,7 +86,7 @@ void testNoId(T Identity, size_t WGSize, size_t NWItems) {
     range<1> LocalRange(WGSize);
     nd_range<1> NDRange(GlobalRange, LocalRange);
     CGH.parallel_for<SomeNoIdClass<T, Dim, BinaryOperation>>(
-        NDRange, intel::reduction(Out, BOp), [=](nd_item<1> NDIt, auto &Sum) {
+        NDRange, ONEAPI::reduction(Out, BOp), [=](nd_item<1> NDIt, auto &Sum) {
           Sum.combine(In[NDIt.get_global_linear_id()]);
         });
   });
@@ -110,10 +110,10 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
 #if __cplusplus >= 201402L
-  test<float, 0, intel::maximum<>>(getMinimumFPValue<float>(), 7, 7 * 5);
-  test<signed char, 0, intel::plus<>>(0, 7, 49);
+  test<float, 0, ONEAPI::maximum<>>(getMinimumFPValue<float>(), 7, 7 * 5);
+  test<signed char, 0, ONEAPI::plus<>>(0, 7, 49);
   test<unsigned char, 1, std::multiplies<>>(1, 4, 16);
-  test<unsigned short, 0, intel::plus<>>(0, 1, 512 + 32);
+  test<unsigned short, 0, ONEAPI::plus<>>(0, 1, 512 + 32);
 #endif // __cplusplus >= 201402L
 
   std::cout << "Test passed\n";
diff --git a/sycl/test/reduction/reduction_usm.cpp b/sycl/test/reduction/reduction_usm.cpp
index 592a36904a8e8..54203771cc73e 100644
--- a/sycl/test/reduction/reduction_usm.cpp
+++ b/sycl/test/reduction/reduction_usm.cpp
@@ -7,7 +7,7 @@
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
 // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out
-// TODO: Enable the test for HOST when it supports intel::reduce() and barrier()
+// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and barrier()
 
 // This test performs basic checks of parallel_for(nd_range, reduction, func)
 // with reductions initialized with USM var.
@@ -62,7 +62,7 @@ void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) {
   // Compute.
   Q.submit([&](handler &CGH) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
-    auto Redu = intel::reduction(ReduVarPtr, Identity, BOp);
+    auto Redu = ONEAPI::reduction(ReduVarPtr, Identity, BOp);
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
     nd_range<1> NDRange(GlobalRange, LocalRange);
@@ -105,16 +105,16 @@ void testUSM(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
   // fast atomics and fast reduce
-  testUSM<int, 1, intel::plus<int>>(0, 49, 49 * 5);
-  testUSM<int, 0, intel::plus<int>>(0, 8, 128);
+  testUSM<int, 1, ONEAPI::plus<int>>(0, 49, 49 * 5);
+  testUSM<int, 0, ONEAPI::plus<int>>(0, 8, 128);
 
   // fast atomics
-  testUSM<int, 0, intel::bit_or<int>>(0, 7, 7 * 3);
-  testUSM<int, 1, intel::bit_or<int>>(0, 4, 128);
+  testUSM<int, 0, ONEAPI::bit_or<int>>(0, 7, 7 * 3);
+  testUSM<int, 1, ONEAPI::bit_or<int>>(0, 4, 128);
 
   // fast reduce
-  testUSM<float, 1, intel::minimum<float>>(getMaximumFPValue<float>(), 5, 5 * 7);
-  testUSM<float, 0, intel::maximum<float>>(getMinimumFPValue<float>(), 4, 128);
+  testUSM<float, 1, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 5, 5 * 7);
+  testUSM<float, 0, ONEAPI::maximum<float>>(getMinimumFPValue<float>(), 4, 128);
 
   // generic algorithm
   testUSM<int, 0, std::multiplies<int>>(1, 7, 7 * 5);
diff --git a/sycl/test/regression/sub-group-store-const-ref.cpp b/sycl/test/regression/sub-group-store-const-ref.cpp
index dd10e1d57f12f..5c79e5e6758f6 100644
--- a/sycl/test/regression/sub-group-store-const-ref.cpp
+++ b/sycl/test/regression/sub-group-store-const-ref.cpp
@@ -13,4 +13,4 @@
 #include <CL/sycl.hpp>
 using namespace sycl;
 
-void test(intel::sub_group sg, global_ptr<int> ptr) { sg.store(ptr, 1); }
+void test(ONEAPI::sub_group sg, global_ptr<int> ptr) { sg.store(ptr, 1); }
diff --git a/sycl/test/spec_const/spec_const_hw.cpp b/sycl/test/spec_const/spec_const_hw.cpp
index 442121353bb73..eb5bdf725c63d 100644
--- a/sycl/test/spec_const/spec_const_hw.cpp
+++ b/sycl/test/spec_const/spec_const_hw.cpp
@@ -35,7 +35,7 @@ int val = 10;
 int get_value() { return val; }
 
 float foo(
-    const cl::sycl::experimental::spec_constant<float, MyFloatConst> &f32) {
+    const cl::sycl::ONEAPI::spec_constant<float, MyFloatConst> &f32) {
   return f32;
 }
 
@@ -66,10 +66,10 @@ int main(int argc, char **argv) {
   // TODO make this floating point once supported by the compiler
   float goldf = (float)get_value();
 
-  cl::sycl::experimental::spec_constant<int32_t, MyInt32Const> i32 =
+  cl::sycl::ONEAPI::spec_constant<int32_t, MyInt32Const> i32 =
       program1.set_spec_constant<MyInt32Const>(goldi);
 
-  cl::sycl::experimental::spec_constant<float, MyFloatConst> f32 =
+  cl::sycl::ONEAPI::spec_constant<float, MyFloatConst> f32 =
       program2.set_spec_constant<MyFloatConst>(goldf);
 
   program1.build_with_kernel_type<KernelAAAi>();
diff --git a/sycl/test/spec_const/spec_const_neg.cpp b/sycl/test/spec_const/spec_const_neg.cpp
index 7312e29ab40e1..a195b9ad59bc9 100644
--- a/sycl/test/spec_const/spec_const_neg.cpp
+++ b/sycl/test/spec_const/spec_const_neg.cpp
@@ -45,7 +45,7 @@ int main(int argc, char **argv) {
             << "\n";
   cl::sycl::program program1(q.get_context());
 
-  cl::sycl::experimental::spec_constant<int32_t, MyInt32Const> i32 =
+  cl::sycl::ONEAPI::spec_constant<int32_t, MyInt32Const> i32 =
       program1.set_spec_constant<MyInt32Const>(10);
 
   std::vector<int> veci(1);
@@ -56,7 +56,7 @@ int main(int argc, char **argv) {
   try {
     // This is an attempt to set a spec constant after the program has been
     // built - spec_const_error should be thrown
-    cl::sycl::experimental::spec_constant<int32_t, MyInt32Const> i32 =
+    cl::sycl::ONEAPI::spec_constant<int32_t, MyInt32Const> i32 =
         program1.set_spec_constant<MyInt32Const>(10);
 
     cl::sycl::buffer<int, 1> bufi(veci.data(), veci.size());
@@ -69,7 +69,7 @@ int main(int argc, char **argv) {
             acci[0] = i32.get();
           });
     });
-  } catch (cl::sycl::experimental::spec_const_error &sc_err) {
+  } catch (cl::sycl::ONEAPI::spec_const_error &sc_err) {
     passed = true;
   } catch (cl::sycl::exception &e) {
     std::cout << "*** Exception caught: " << e.what() << "\n";
diff --git a/sycl/test/spec_const/spec_const_redefine.cpp b/sycl/test/spec_const/spec_const_redefine.cpp
index 6883ce5c9d7d6..da589dba51db2 100644
--- a/sycl/test/spec_const/spec_const_redefine.cpp
+++ b/sycl/test/spec_const/spec_const_redefine.cpp
@@ -68,9 +68,9 @@ int main(int argc, char **argv) {
   for (int i = 0; i < n_sc_sets; i++) {
     cl::sycl::program program(q.get_context());
     const int *sc_set = &sc_vals[i][0];
-    cl::sycl::experimental::spec_constant<int32_t, SC0> sc0 =
+    cl::sycl::ONEAPI::spec_constant<int32_t, SC0> sc0 =
         program.set_spec_constant<SC0>(sc_set[0]);
-    cl::sycl::experimental::spec_constant<int32_t, SC1> sc1 =
+    cl::sycl::ONEAPI::spec_constant<int32_t, SC1> sc1 =
         program.set_spec_constant<SC1>(sc_set[1]);
 
     program.build_with_kernel_type<KernelAAA>();
diff --git a/sycl/test/spec_const/spec_const_types.cpp b/sycl/test/spec_const/spec_const_types.cpp
index c7017b2b69726..29d13f8a4c23e 100644
--- a/sycl/test/spec_const/spec_const_types.cpp
+++ b/sycl/test/spec_const/spec_const_types.cpp
@@ -42,49 +42,49 @@ int main() {
   cl::sycl::program program(queue.get_context());
 
   // Create specialization constants.
-  cl::sycl::experimental::spec_constant<bool, MyBoolConst> i1 =
+  cl::sycl::ONEAPI::spec_constant<bool, MyBoolConst> i1 =
       program.set_spec_constant<MyBoolConst>((bool)get_value());
   // CHECK-DAG: _ZTS11MyBoolConst=1|0
 
-  cl::sycl::experimental::spec_constant<int8_t, MyInt8Const> i8 =
+  cl::sycl::ONEAPI::spec_constant<int8_t, MyInt8Const> i8 =
       program.set_spec_constant<MyInt8Const>((int8_t)get_value());
   // CHECK-DAG: _ZTS11MyInt8Const=1|1
-  cl::sycl::experimental::spec_constant<uint8_t, MyUInt8Const> ui8 =
+  cl::sycl::ONEAPI::spec_constant<uint8_t, MyUInt8Const> ui8 =
       program.set_spec_constant<MyUInt8Const>((uint8_t)get_value());
   // CHECK-DAG: _ZTS12MyUInt8Const=1|2
 
-  cl::sycl::experimental::spec_constant<int16_t, MyInt16Const> i16 =
+  cl::sycl::ONEAPI::spec_constant<int16_t, MyInt16Const> i16 =
       program.set_spec_constant<MyInt16Const>((int16_t)get_value());
   // CHECK-DAG: _ZTS12MyInt16Const=1|3
-  cl::sycl::experimental::spec_constant<uint16_t, MyUInt16Const> ui16 =
+  cl::sycl::ONEAPI::spec_constant<uint16_t, MyUInt16Const> ui16 =
       program.set_spec_constant<MyUInt16Const>((uint16_t)get_value());
   // CHECK-DAG: _ZTS13MyUInt16Const=1|4
 
-  cl::sycl::experimental::spec_constant<int32_t, MyInt32Const> i32 =
+  cl::sycl::ONEAPI::spec_constant<int32_t, MyInt32Const> i32 =
       program.set_spec_constant<MyInt32Const>((int32_t)get_value());
   // CHECK-DAG: _ZTS12MyInt32Const=1|5
-  cl::sycl::experimental::spec_constant<uint32_t, MyUInt32Const> ui32 =
+  cl::sycl::ONEAPI::spec_constant<uint32_t, MyUInt32Const> ui32 =
       program.set_spec_constant<MyUInt32Const>((uint32_t)get_value());
   // CHECK-DAG: _ZTS13MyUInt32Const=1|6
 
-  cl::sycl::experimental::spec_constant<int64_t, MyInt64Const> i64 =
+  cl::sycl::ONEAPI::spec_constant<int64_t, MyInt64Const> i64 =
       program.set_spec_constant<MyInt64Const>((int64_t)get_value());
   // CHECK-DAG: _ZTS12MyInt64Const=1|7
-  cl::sycl::experimental::spec_constant<uint64_t, MyUInt64Const> ui64 =
+  cl::sycl::ONEAPI::spec_constant<uint64_t, MyUInt64Const> ui64 =
       program.set_spec_constant<MyUInt64Const>((uint64_t)get_value());
   // CHECK-DAG: _ZTS13MyUInt64Const=1|8
 
 #define HALF 0 // TODO not yet supported
 #if HALF
-  cl::sycl::experimental::spec_constant<cl::sycl::half, MyHalfConst> f16 =
+  cl::sycl::ONEAPI::spec_constant<cl::sycl::half, MyHalfConst> f16 =
       program.set_spec_constant<MyHalfConst>((cl::sycl::half)get_value());
 #endif
 
-  cl::sycl::experimental::spec_constant<float, MyFloatConst> f32 =
+  cl::sycl::ONEAPI::spec_constant<float, MyFloatConst> f32 =
       program.set_spec_constant<MyFloatConst>((float)get_value());
   // CHECK-DAG: _ZTS12MyFloatConst=1|9
 
-  cl::sycl::experimental::spec_constant<double, MyDoubleConst> f64 =
+  cl::sycl::ONEAPI::spec_constant<double, MyDoubleConst> f64 =
       program.set_spec_constant<MyDoubleConst>((double)get_value());
   // CHECK-DAG: _ZTS13MyDoubleConst=1|10
 
diff --git a/sycl/test/sub_group/barrier.cpp b/sycl/test/sub_group/barrier.cpp
index 25e31cbeb521c..b8aeefa9ca0e3 100644
--- a/sycl/test/sub_group/barrier.cpp
+++ b/sycl/test/sub_group/barrier.cpp
@@ -33,7 +33,7 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
 
       cgh.parallel_for<sycl_subgr<T>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         size_t lid = SG.get_local_id().get(0);
         size_t gid = NdItem.get_global_id(0);
         size_t SGoff = gid - lid;
diff --git a/sycl/test/sub_group/broadcast.hpp b/sycl/test/sub_group/broadcast.hpp
index b7c6128cde0c2..3a399ecb30053 100644
--- a/sycl/test/sub_group/broadcast.hpp
+++ b/sycl/test/sub_group/broadcast.hpp
@@ -22,7 +22,7 @@ void check(queue &Queue) {
       auto syclacc = syclbuf.template get_access<access::mode::read_write>(cgh);
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
       cgh.parallel_for<sycl_subgr<T>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         /*Broadcast GID of element with SGLID == SGID */
         syclacc[NdItem.get_global_id()] =
             broadcast(SG, T(NdItem.get_global_id(0)), SG.get_group_id());
diff --git a/sycl/test/sub_group/common.cpp b/sycl/test/sub_group/common.cpp
index 17b1a9d8166d8..41623ae2c228b 100644
--- a/sycl/test/sub_group/common.cpp
+++ b/sycl/test/sub_group/common.cpp
@@ -36,7 +36,7 @@ void check(queue &Queue, unsigned int G, unsigned int L) {
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
       auto syclacc = syclbuf.get_access<access::mode::read_write>(cgh);
       cgh.parallel_for<class sycl_subgr>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         syclacc[NdItem.get_global_id()].local_id = SG.get_local_id().get(0);
         syclacc[NdItem.get_global_id()].local_range =
             SG.get_local_range().get(0);
diff --git a/sycl/test/sub_group/common_ocl.cpp b/sycl/test/sub_group/common_ocl.cpp
index 232e6c6c11acc..6a1f47aa3acc3 100644
--- a/sycl/test/sub_group/common_ocl.cpp
+++ b/sycl/test/sub_group/common_ocl.cpp
@@ -64,7 +64,7 @@ void check(queue &Queue, const int G, const int L, const char *SpvFile) {
     Queue.submit([&](handler &cgh) {
       auto syclacc = syclbuf.get_access<access::mode::read_write>(cgh);
       cgh.parallel_for<class sycl_subgr>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         syclacc[NdItem.get_global_id()].local_id = SG.get_local_id().get(0);
         syclacc[NdItem.get_global_id()].local_range =
             SG.get_local_range().get(0);
diff --git a/sycl/test/sub_group/generic-shuffle.cpp b/sycl/test/sub_group/generic-shuffle.cpp
index d2d7e191dfa32..f0c6049d1f6b4 100644
--- a/sycl/test/sub_group/generic-shuffle.cpp
+++ b/sycl/test/sub_group/generic-shuffle.cpp
@@ -41,7 +41,7 @@ void check_pointer(queue &Queue, size_t G = 240, size_t L = 60) {
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
 
       cgh.parallel_for<pointer_kernel<T>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         uint32_t wggid = NdItem.get_global_id(0);
         uint32_t sgid = SG.get_group_id().get(0);
         if (wggid == 0)
@@ -127,7 +127,7 @@ void check_struct(queue &Queue, Generator &Gen, size_t G = 240, size_t L = 60) {
       auto in = buf_in.template get_access<access::mode::read>(cgh);
 
       cgh.parallel_for<pointer_kernel<T>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         uint32_t wggid = NdItem.get_global_id(0);
         uint32_t sgid = SG.get_group_id().get(0);
         if (wggid == 0)
diff --git a/sycl/test/sub_group/generic_reduce.cpp b/sycl/test/sub_group/generic_reduce.cpp
index cfeea7f459b69..997a577c0d051 100644
--- a/sycl/test/sub_group/generic_reduce.cpp
+++ b/sycl/test/sub_group/generic_reduce.cpp
@@ -26,7 +26,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
       auto acc = buf.template get_access<access::mode::read_write>(cgh);
       cgh.parallel_for(
           NdRange, [=](nd_item<1> NdItem) {
-            intel::sub_group sg = NdItem.get_sub_group();
+            ONEAPI::sub_group sg = NdItem.get_sub_group();
             if (skip_init) {
               acc[NdItem.get_global_id(0)] =
                   reduce(sg, T(NdItem.get_global_id(0)), op);
@@ -78,8 +78,8 @@ int main() {
   // Test user-defined type
   // Use complex as a proxy for this
   using UDT = std::complex<float>;
-  check_op<UDT>(Queue, UDT(L, L), intel::plus<UDT>(), false, G, L);
-  check_op<UDT>(Queue, UDT(0, 0), intel::plus<UDT>(), true, G, L);
+  check_op<UDT>(Queue, UDT(L, L), ONEAPI::plus<UDT>(), false, G, L);
+  check_op<UDT>(Queue, UDT(0, 0), ONEAPI::plus<UDT>(), true, G, L);
 
   // Test user-defined operator
   auto UDOp = [=](const auto &lhs, const auto &rhs) { return lhs + rhs; };
diff --git a/sycl/test/sub_group/load_store.cpp b/sycl/test/sub_group/load_store.cpp
index 8366e8baca7d9..c7ccca6c2be08 100644
--- a/sycl/test/sub_group/load_store.cpp
+++ b/sycl/test/sub_group/load_store.cpp
@@ -40,7 +40,7 @@ template <typename T, int N> void check(queue &Queue) {
       accessor<T, 1, access::mode::read_write, access::target::local> LocalMem(
           {L}, cgh);
       cgh.parallel_for<sycl_subgr<T, N>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         if (SG.get_group_id().get(0) % N == 0) {
           size_t SGOffset =
               SG.get_group_id().get(0) * SG.get_max_local_range().get(0);
@@ -114,7 +114,7 @@ template <typename T> void check(queue &Queue) {
       accessor<T, 1, access::mode::read_write, access::target::local> LocalMem(
           {L}, cgh);
       cgh.parallel_for<sycl_subgr<T, 0>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         if (NdItem.get_global_id(0) == 0)
           sgsizeacc[0] = SG.get_max_local_range()[0];
         size_t SGOffset =
diff --git a/sycl/test/sub_group/reduce.hpp b/sycl/test/sub_group/reduce.hpp
index 2fd29e30a3081..d1a35943add7f 100644
--- a/sycl/test/sub_group/reduce.hpp
+++ b/sycl/test/sub_group/reduce.hpp
@@ -26,7 +26,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
       auto acc = buf.template get_access<access::mode::read_write>(cgh);
       cgh.parallel_for<sycl_subgr<T, BinaryOperation>>(
           NdRange, [=](nd_item<1> NdItem) {
-            intel::sub_group sg = NdItem.get_sub_group();
+            ONEAPI::sub_group sg = NdItem.get_sub_group();
             if (skip_init) {
               acc[NdItem.get_global_id(0)] =
                   reduce(sg, T(NdItem.get_global_id(0)), op);
@@ -73,23 +73,23 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
     L = 32;
   }
 
-  check_op<T>(Queue, T(L), intel::plus<T>(), false, G, L);
-  check_op<T>(Queue, T(0), intel::plus<T>(), true, G, L);
+  check_op<T>(Queue, T(L), ONEAPI::plus<T>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::plus<T>(), true, G, L);
 
-  check_op<T>(Queue, T(0), intel::minimum<T>(), false, G, L);
-  check_op<T>(Queue, T(G), intel::minimum<T>(), true, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::minimum<T>(), false, G, L);
+  check_op<T>(Queue, T(G), ONEAPI::minimum<T>(), true, G, L);
 
-  check_op<T>(Queue, T(G), intel::maximum<T>(), false, G, L);
-  check_op<T>(Queue, T(0), intel::maximum<T>(), true, G, L);
+  check_op<T>(Queue, T(G), ONEAPI::maximum<T>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::maximum<T>(), true, G, L);
 
 #if __cplusplus >= 201402L
-  check_op<T>(Queue, T(L), intel::plus<>(), false, G, L);
-  check_op<T>(Queue, T(0), intel::plus<>(), true, G, L);
+  check_op<T>(Queue, T(L), ONEAPI::plus<>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::plus<>(), true, G, L);
 
-  check_op<T>(Queue, T(0), intel::minimum<>(), false, G, L);
-  check_op<T>(Queue, T(G), intel::minimum<>(), true, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::minimum<>(), false, G, L);
+  check_op<T>(Queue, T(G), ONEAPI::minimum<>(), true, G, L);
 
-  check_op<T>(Queue, T(G), intel::maximum<>(), false, G, L);
-  check_op<T>(Queue, T(0), intel::maximum<>(), true, G, L);
+  check_op<T>(Queue, T(G), ONEAPI::maximum<>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::maximum<>(), true, G, L);
 #endif
 }
diff --git a/sycl/test/sub_group/scan.hpp b/sycl/test/sub_group/scan.hpp
index ebb6abda3984d..c84a145c83273 100644
--- a/sycl/test/sub_group/scan.hpp
+++ b/sycl/test/sub_group/scan.hpp
@@ -28,7 +28,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
       auto inacc = inbuf.template get_access<access::mode::read_write>(cgh);
       cgh.parallel_for<sycl_subgr<T, BinaryOperation>>(
           NdRange, [=](nd_item<1> NdItem) {
-            intel::sub_group sg = NdItem.get_sub_group();
+            ONEAPI::sub_group sg = NdItem.get_sub_group();
             if (skip_init) {
               exacc[NdItem.get_global_id(0)] =
                   exclusive_scan(sg, T(NdItem.get_global_id(0)), op);
@@ -81,50 +81,50 @@ void check(queue &Queue, size_t G = 120, size_t L = 60) {
     L = 32;
   }
 
-  check_op<T>(Queue, T(L), intel::plus<T>(), false, G, L);
-  check_op<T>(Queue, T(0), intel::plus<T>(), true, G, L);
+  check_op<T>(Queue, T(L), ONEAPI::plus<T>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::plus<T>(), true, G, L);
 
-  check_op<T>(Queue, T(0), intel::minimum<T>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::minimum<T>(), false, G, L);
   if (std::is_floating_point<T>::value ||
       std::is_same<T, cl::sycl::half>::value) {
-    check_op<T>(Queue, std::numeric_limits<T>::infinity(), intel::minimum<T>(),
+    check_op<T>(Queue, std::numeric_limits<T>::infinity(), ONEAPI::minimum<T>(),
                 true, G, L);
   } else {
-    check_op<T>(Queue, std::numeric_limits<T>::max(), intel::minimum<T>(), true,
+    check_op<T>(Queue, std::numeric_limits<T>::max(), ONEAPI::minimum<T>(), true,
                 G, L);
   }
 
-  check_op<T>(Queue, T(G), intel::maximum<T>(), false, G, L);
+  check_op<T>(Queue, T(G), ONEAPI::maximum<T>(), false, G, L);
   if (std::is_floating_point<T>::value ||
       std::is_same<T, cl::sycl::half>::value) {
-    check_op<T>(Queue, -std::numeric_limits<T>::infinity(), intel::maximum<T>(),
+    check_op<T>(Queue, -std::numeric_limits<T>::infinity(), ONEAPI::maximum<T>(),
                 true, G, L);
   } else {
-    check_op<T>(Queue, std::numeric_limits<T>::min(), intel::maximum<T>(), true,
+    check_op<T>(Queue, std::numeric_limits<T>::min(), ONEAPI::maximum<T>(), true,
                 G, L);
   }
 
 #if __cplusplus >= 201402L
-  check_op<T>(Queue, T(L), intel::plus<>(), false, G, L);
-  check_op<T>(Queue, T(0), intel::plus<>(), true, G, L);
+  check_op<T>(Queue, T(L), ONEAPI::plus<>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::plus<>(), true, G, L);
 
-  check_op<T>(Queue, T(0), intel::minimum<>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::minimum<>(), false, G, L);
   if (std::is_floating_point<T>::value ||
       std::is_same<T, cl::sycl::half>::value) {
-    check_op<T>(Queue, std::numeric_limits<T>::infinity(), intel::minimum<>(),
+    check_op<T>(Queue, std::numeric_limits<T>::infinity(), ONEAPI::minimum<>(),
                 true, G, L);
   } else {
-    check_op<T>(Queue, std::numeric_limits<T>::max(), intel::minimum<>(), true,
+    check_op<T>(Queue, std::numeric_limits<T>::max(), ONEAPI::minimum<>(), true,
                 G, L);
   }
 
-  check_op<T>(Queue, T(G), intel::maximum<>(), false, G, L);
+  check_op<T>(Queue, T(G), ONEAPI::maximum<>(), false, G, L);
   if (std::is_floating_point<T>::value ||
       std::is_same<T, cl::sycl::half>::value) {
-    check_op<T>(Queue, -std::numeric_limits<T>::infinity(), intel::maximum<>(),
+    check_op<T>(Queue, -std::numeric_limits<T>::infinity(), ONEAPI::maximum<>(),
                 true, G, L);
   } else {
-    check_op<T>(Queue, std::numeric_limits<T>::min(), intel::maximum<>(), true,
+    check_op<T>(Queue, std::numeric_limits<T>::min(), ONEAPI::maximum<>(), true,
                 G, L);
   }
 #endif
diff --git a/sycl/test/sub_group/shuffle.hpp b/sycl/test/sub_group/shuffle.hpp
index 94c82ab99c2d1..03909661d9d23 100644
--- a/sycl/test/sub_group/shuffle.hpp
+++ b/sycl/test/sub_group/shuffle.hpp
@@ -43,7 +43,7 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
 
       cgh.parallel_for<sycl_subgr<T, N>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         uint32_t wggid = NdItem.get_global_id(0);
         uint32_t sgid = SG.get_group_id().get(0);
         vec<T, N> vwggid(wggid), vsgid(sgid);
@@ -150,7 +150,7 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
 
       cgh.parallel_for<sycl_subgr<T, 0>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         uint32_t wggid = NdItem.get_global_id(0);
         uint32_t sgid = SG.get_group_id().get(0);
         if (wggid == 0)
diff --git a/sycl/test/sub_group/vote.cpp b/sycl/test/sub_group/vote.cpp
index 382266fa412c0..1d9852e217d6f 100644
--- a/sycl/test/sub_group/vote.cpp
+++ b/sycl/test/sub_group/vote.cpp
@@ -49,7 +49,7 @@ void check(queue Queue, const int G, const int L, const int D, const int R) {
       auto sganyacc = sganybuf.get_access<access::mode::read_write>(cgh);
       auto sgallacc = sgallbuf.get_access<access::mode::read_write>(cgh);
       cgh.parallel_for<class subgr>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         /* Set to 1 if any local ID in subgroup devided by D has remainder R */
         if (any_of(SG, SG.get_local_id().get(0) % D == R)) {
           sganyacc[NdItem.get_global_id()] = 1;

From 56bbbfa1a4ece3c9147127f1d14f3c2726cacfea Mon Sep 17 00:00:00 2001
From: James Brodman <james.brodman@intel.com>
Date: Mon, 3 Aug 2020 13:07:07 -0400
Subject: [PATCH 02/12] Move spec constants and printf to ONEAPI::experimental

Signed-off-by: James Brodman <james.brodman@intel.com>
---
 clang/lib/Sema/SemaSYCL.cpp                   |  3 +-
 clang/test/CodeGenSYCL/Inputs/sycl.hpp        |  2 +
 .../CodeGenSYCL/int_header_spec_const.cpp     | 20 +++++-----
 clang/test/SemaSYCL/Inputs/sycl.hpp           |  4 +-
 .../SemaSYCL/spec_const_and_accesor_crash.cpp |  4 +-
 .../tools/sycl-post-link/sc_sym_two_refs.ll   |  4 +-
 sycl/include/CL/sycl.hpp                      |  2 +-
 .../ONEAPI/{ => experimental}/builtins.hpp    |  2 +
 .../{ => experimental}/spec_constant.hpp      |  4 ++
 sycl/include/CL/sycl/program.hpp              |  8 ++--
 sycl/source/detail/program_impl.cpp           |  4 +-
 .../program_manager/program_manager.cpp       |  4 +-
 sycl/test/built-ins/printf.cpp                | 38 +++++++++---------
 sycl/test/spec_const/spec_const_hw.cpp        |  7 ++--
 sycl/test/spec_const/spec_const_neg.cpp       |  6 +--
 sycl/test/spec_const/spec_const_redefine.cpp  |  4 +-
 sycl/test/spec_const/spec_const_types.cpp     | 40 +++++++++----------
 17 files changed, 82 insertions(+), 74 deletions(-)
 rename sycl/include/CL/sycl/ONEAPI/{ => experimental}/builtins.hpp (98%)
 rename sycl/include/CL/sycl/ONEAPI/{ => experimental}/spec_constant.hpp (96%)

diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp
index fa5f91e1b76e0..7c59b4459e03d 100644
--- a/clang/lib/Sema/SemaSYCL.cpp
+++ b/clang/lib/Sema/SemaSYCL.cpp
@@ -2813,10 +2813,11 @@ bool Util::isSyclHalfType(const QualType &Ty) {
 
 bool Util::isSyclSpecConstantType(const QualType &Ty) {
   const StringRef &Name = "spec_constant";
-  std::array<DeclContextDesc, 4> Scopes = {
+  std::array<DeclContextDesc, 5> Scopes = {
       Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"},
       Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"},
       Util::DeclContextDesc{clang::Decl::Kind::Namespace, "ONEAPI"},
+      Util::DeclContextDesc{clang::Decl::Kind::Namespace, "experimental"},
       Util::DeclContextDesc{Decl::Kind::ClassTemplateSpecialization, Name}};
   return matchQualifiedTypeName(Ty, Scopes);
 }
diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp
index 1a07d8a1a4a71..67442f29d45a3 100644
--- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp
+++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp
@@ -243,6 +243,7 @@ struct get_kernel_name_t<auto_name, Type> {
 };
 
 namespace ONEAPI {
+namespace experimental {
 template <typename T, typename ID = T>
 class spec_constant {
 public:
@@ -256,6 +257,7 @@ class spec_constant {
     return get();
   }
 };
+} // namespace experimental
 } // namespace ONEAPI
 
 #define ATTR_SYCL_KERNEL __attribute__((sycl_kernel))
diff --git a/clang/test/CodeGenSYCL/int_header_spec_const.cpp b/clang/test/CodeGenSYCL/int_header_spec_const.cpp
index d41798ceb0475..a32389ae4c492 100644
--- a/clang/test/CodeGenSYCL/int_header_spec_const.cpp
+++ b/clang/test/CodeGenSYCL/int_header_spec_const.cpp
@@ -20,18 +20,18 @@ class MyDoubleConst;
 
 int main() {
   // Create specialization constants.
-  cl::sycl::ONEAPI::spec_constant<bool, MyBoolConst> i1(false);
-  cl::sycl::ONEAPI::spec_constant<char, MyInt8Const> i8(0);
-  cl::sycl::ONEAPI::spec_constant<unsigned char, MyUInt8Const> ui8(0);
-  cl::sycl::ONEAPI::spec_constant<short, MyInt16Const> i16(0);
-  cl::sycl::ONEAPI::spec_constant<unsigned short, MyUInt16Const> ui16(0);
-  cl::sycl::ONEAPI::spec_constant<int, MyInt32Const> i32(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<bool, MyBoolConst> i1(false);
+  cl::sycl::ONEAPI::experimental::spec_constant<char, MyInt8Const> i8(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<unsigned char, MyUInt8Const> ui8(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<short, MyInt16Const> i16(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<unsigned short, MyUInt16Const> ui16(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<int, MyInt32Const> i32(0);
   // Constant used twice, but there must be single entry in the int header,
   // otherwise compilation error would be issued.
-  cl::sycl::ONEAPI::spec_constant<int, MyInt32Const> i32_1(0);
-  cl::sycl::ONEAPI::spec_constant<unsigned int, MyUInt32Const> ui32(0);
-  cl::sycl::ONEAPI::spec_constant<float, MyFloatConst> f32(0);
-  cl::sycl::ONEAPI::spec_constant<double, MyDoubleConst> f64(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<int, MyInt32Const> i32_1(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<unsigned int, MyUInt32Const> ui32(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<float, MyFloatConst> f32(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<double, MyDoubleConst> f64(0);
 
   double val;
   double *ptr = &val; // to avoid "unused" warnings
diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp
index 4b63b55e26f4d..30e71dc9e8a88 100644
--- a/clang/test/SemaSYCL/Inputs/sycl.hpp
+++ b/clang/test/SemaSYCL/Inputs/sycl.hpp
@@ -209,11 +209,11 @@ class handler {
 };
 
 namespace ONEAPI {
-
+namespace experimental {
 template <typename T, typename ID = T>
 class spec_constant {};
 } // namespace experimental
-
+} // namespace ONEAPI
 } // namespace sycl
 } // namespace cl
 
diff --git a/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp b/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp
index 4c147023c5d46..35cb9447f3740 100644
--- a/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp
+++ b/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp
@@ -10,9 +10,9 @@ __attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
 }
 
 int main() {
-  cl::sycl::ONEAPI::spec_constant<char, class MyInt32Const> spec_const;
+  cl::sycl::ONEAPI::experimental::spec_constant<char, class MyInt32Const> spec_const;
   cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write> accessor;
-  // CHECK: FieldDecl {{.*}} implicit referenced 'cl::sycl::ONEAPI::spec_constant<char, class MyInt32Const>'
+  // CHECK: FieldDecl {{.*}} implicit referenced 'cl::sycl::ONEAPI::experimental::spec_constant<char, class MyInt32Const>'
   // CHECK: FieldDecl {{.*}} implicit referenced 'cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write>'
   kernel<class MyKernel>([spec_const, accessor]() {});
   return 0;
diff --git a/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll b/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll
index 5eadf6d40bc28..185a49a0e0798 100644
--- a/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll
+++ b/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll
@@ -7,7 +7,7 @@
 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
 target triple = "spir64-unknown-unknown-sycldevice"
 
-%"sycl::experimental::spec_constant" = type { i8 }
+%"sycl::ONEAPI::experimental::spec_constant" = type { i8 }
 
 @SCSymID = private unnamed_addr constant [10 x i8] c"SpecConst\00", align 1
 ; CHECK-NOT: @SCSymID
@@ -21,7 +21,7 @@ define weak_odr dso_local spir_kernel void @Kernel() {
 }
 
 ; Function Attrs: norecurse
-define dso_local spir_func float @foo_float(%"sycl::experimental::spec_constant" addrspace(4)* nocapture readnone dereferenceable(1) %0) local_unnamed_addr #3 {
+define dso_local spir_func float @foo_float(%"sycl::ONEAPI::experimental::spec_constant" addrspace(4)* nocapture readnone dereferenceable(1) %0) local_unnamed_addr #3 {
   %2 = tail call spir_func float @_Z27__sycl_getSpecConstantValueIfET_PKc(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([10 x i8], [10 x i8]* @SCSymID, i64 0, i64 0) to i8 addrspace(4)*))
   ret float %2
 }
diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp
index aca073b79db6d..426ce0c4eed69 100644
--- a/sycl/include/CL/sycl.hpp
+++ b/sycl/include/CL/sycl.hpp
@@ -28,7 +28,7 @@
 #include <CL/sycl/nd_item.hpp>
 #include <CL/sycl/nd_range.hpp>
 #include <CL/sycl/ONEAPI/atomic.hpp>
-#include <CL/sycl/ONEAPI/builtins.hpp>
+#include <CL/sycl/ONEAPI/experimental/builtins.hpp>
 #include <CL/sycl/ONEAPI/function_pointer.hpp>
 #include <CL/sycl/ONEAPI/group_algorithm.hpp>
 #include <CL/sycl/ONEAPI/reduction.hpp>
diff --git a/sycl/include/CL/sycl/ONEAPI/builtins.hpp b/sycl/include/CL/sycl/ONEAPI/experimental/builtins.hpp
similarity index 98%
rename from sycl/include/CL/sycl/ONEAPI/builtins.hpp
rename to sycl/include/CL/sycl/ONEAPI/experimental/builtins.hpp
index 50ec284a70d39..e5b047e29c7a0 100644
--- a/sycl/include/CL/sycl/ONEAPI/builtins.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/experimental/builtins.hpp
@@ -19,6 +19,7 @@
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
 namespace ONEAPI {
+namespace experimental {
 
 // Provides functionality to print data from kernels in a C way:
 // - On non-host devices this function is directly mapped to printf from
@@ -66,6 +67,7 @@ int printf(const CONSTANT_AS char *__format, Args... args) {
 #endif
 }
 
+} // namespace experimental
 } // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/ONEAPI/spec_constant.hpp b/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp
similarity index 96%
rename from sycl/include/CL/sycl/ONEAPI/spec_constant.hpp
rename to sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp
index ee10513b67ad9..276bdeae206e5 100644
--- a/sycl/include/CL/sycl/ONEAPI/spec_constant.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp
@@ -22,7 +22,10 @@
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
+class program;
+
 namespace ONEAPI {
+namespace experimental {
 
 class spec_const_error : public compile_program_error {
   using compile_program_error::compile_program_error;
@@ -56,6 +59,7 @@ template <typename T, typename ID = T> class spec_constant {
   }
 };
 
+} // namespace experimental
 } // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/program.hpp b/sycl/include/CL/sycl/program.hpp
index 252776ed83ee4..78ce15dc7fa64 100644
--- a/sycl/include/CL/sycl/program.hpp
+++ b/sycl/include/CL/sycl/program.hpp
@@ -8,13 +8,13 @@
 
 #pragma once
 
+#include <CL/sycl/ONEAPI/experimental/spec_constant.hpp>
 #include <CL/sycl/context.hpp>
 #include <CL/sycl/detail/export.hpp>
 #include <CL/sycl/detail/kernel_desc.hpp>
 #include <CL/sycl/detail/os_util.hpp>
 #include <CL/sycl/info/info_desc.hpp>
 #include <CL/sycl/kernel.hpp>
-#include <CL/sycl/ONEAPI/spec_constant.hpp>
 #include <CL/sycl/stl.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
@@ -307,7 +307,7 @@ class __SYCL_EXPORT program {
   /// \return a specialization constant instance corresponding to given type ID
   ///         passed as a template parameter
   template <typename ID, typename T>
-  ONEAPI::spec_constant<T, ID> set_spec_constant(T Cst) {
+  ONEAPI::experimental::spec_constant<T, ID> set_spec_constant(T Cst) {
     constexpr const char *Name = detail::SpecConstantInfo<ID>::getName();
     static_assert(std::is_integral<T>::value ||
                       std::is_floating_point<T>::value,
@@ -315,10 +315,10 @@ class __SYCL_EXPORT program {
 #ifdef __SYCL_DEVICE_ONLY__
     (void)Cst;
     (void)Name;
-    return ONEAPI::spec_constant<T, ID>();
+    return ONEAPI::experimental::spec_constant<T, ID>();
 #else
     set_spec_constant_impl(Name, &Cst, sizeof(T));
-    return ONEAPI::spec_constant<T, ID>(Cst);
+    return ONEAPI::experimental::spec_constant<T, ID>(Cst);
 #endif // __SYCL_DEVICE_ONLY__
   }
 
diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp
index f1a73b567c816..8453584aa067c 100644
--- a/sycl/source/detail/program_impl.cpp
+++ b/sycl/source/detail/program_impl.cpp
@@ -484,8 +484,8 @@ vector_class<device> program_impl::get_info<info::program::devices>() const {
 void program_impl::set_spec_constant_impl(const char *Name, const void *ValAddr,
                                           size_t ValSize) {
   if (MState != program_state::none)
-    throw cl::sycl::ONEAPI::spec_const_error("Invalid program state",
-                                                   PI_INVALID_PROGRAM);
+    throw cl::sycl::ONEAPI::experimental::spec_const_error(
+        "Invalid program state", PI_INVALID_PROGRAM);
   // Reuse cached programs lock as opposed to introducing a new lock.
   auto LockGuard = MContext->getKernelProgramCache().acquireCachedPrograms();
   spec_constant_impl &SC = SpecConstRegistry[Name];
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
index ca175f0546c31..64b457de96d18 100644
--- a/sycl/source/detail/program_manager/program_manager.cpp
+++ b/sycl/source/detail/program_manager/program_manager.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <CL/sycl/ONEAPI/experimental/spec_constant.hpp>
 #include <CL/sycl/backend_types.hpp>
 #include <CL/sycl/context.hpp>
 #include <CL/sycl/detail/common.hpp>
@@ -14,7 +15,6 @@
 #include <CL/sycl/detail/util.hpp>
 #include <CL/sycl/device.hpp>
 #include <CL/sycl/exception.hpp>
-#include <CL/sycl/ONEAPI/spec_constant.hpp>
 #include <CL/sycl/stl.hpp>
 #include <detail/context_impl.hpp>
 #include <detail/device_impl.hpp>
@@ -986,7 +986,7 @@ void ProgramManager::flushSpecConstants(const program_impl &Prg,
       auto LockGuard = Ctx->getKernelProgramCache().acquireCachedPrograms();
       auto It = NativePrograms.find(NativePrg);
       if (It == NativePrograms.end())
-        throw sycl::ONEAPI::spec_const_error(
+        throw sycl::ONEAPI::experimental::spec_const_error(
             "spec constant is set in a program w/o a binary image",
             PI_INVALID_OPERATION);
       Img = It->second;
diff --git a/sycl/test/built-ins/printf.cpp b/sycl/test/built-ins/printf.cpp
index 768862f40cfdb..1693ed97395ab 100644
--- a/sycl/test/built-ins/printf.cpp
+++ b/sycl/test/built-ins/printf.cpp
@@ -41,7 +41,7 @@ int main() {
     Queue.submit([&](handler &CGH) {
       CGH.single_task<class integral>([=]() {
         // String
-        ONEAPI::printf(format_hello_world);
+        ONEAPI::experimental::printf(format_hello_world);
         // Due to a bug in Intel CPU Runtime for OpenCL on Windows, information
         // printed using such format strings (without %-specifiers) might
         // appear in different order if output is redirected to a file or
@@ -50,8 +50,8 @@ int main() {
         // CHECK: {{(Hello, World!)?}}
 
         // Integral types
-        ONEAPI::printf(format_int, (int32_t)123);
-        ONEAPI::printf(format_int, (int32_t)-123);
+        ONEAPI::experimental::printf(format_int, (int32_t)123);
+        ONEAPI::experimental::printf(format_int, (int32_t)-123);
         // CHECK: 123
         // CHECK-NEXT: -123
 
@@ -60,8 +60,8 @@ int main() {
           // You can declare format string in non-global scope, but in this case
           // static keyword is required
           static const CONSTANT char format[] = "%f\n";
-          ONEAPI::printf(format, 33.4f);
-          ONEAPI::printf(format, -33.4f);
+          ONEAPI::experimental::printf(format, 33.4f);
+          ONEAPI::experimental::printf(format, -33.4f);
         }
         // CHECK-NEXT: 33.4
         // CHECK-NEXT: -33.4
@@ -73,23 +73,23 @@ int main() {
         using ocl_int4 = cl::sycl::vec<int, 4>::vector_t;
         {
           static const CONSTANT char format[] = "%v4d\n";
-          ONEAPI::printf(format, (ocl_int4)v4);
+          ONEAPI::experimental::printf(format, (ocl_int4)v4);
         }
 
         // However, you are still able to print them by-element:
         {
-          ONEAPI::printf(format_vec, (int32_t)v4.w(),
-                                      (int32_t)v4.z(), (int32_t)v4.y(),
-                                      (int32_t)v4.x());
+          ONEAPI::experimental::printf(format_vec, (int32_t)v4.w(),
+                                       (int32_t)v4.z(), (int32_t)v4.y(),
+                                       (int32_t)v4.x());
         }
 #else
         // On host side you always have to print them by-element:
-        ONEAPI::printf(format_vec, (int32_t)v4.x(),
-                                    (int32_t)v4.y(), (int32_t)v4.z(),
-                                    (int32_t)v4.w());
-        ONEAPI::printf(format_vec, (int32_t)v4.w(),
-                                    (int32_t)v4.z(), (int32_t)v4.y(),
-                                    (int32_t)v4.x());
+        ONEAPI::experimental::printf(format_vec, (int32_t)v4.x(),
+                                     (int32_t)v4.y(), (int32_t)v4.z(),
+                                     (int32_t)v4.w());
+        ONEAPI::experimental::printf(format_vec, (int32_t)v4.w(),
+                                     (int32_t)v4.z(), (int32_t)v4.y(),
+                                     (int32_t)v4.x());
 #endif // __SYCL_DEVICE_ONLY__
        // CHECK-NEXT: 5,6,7,8
        // CHECK-NEXT: 8,7,6,5
@@ -100,7 +100,7 @@ int main() {
         // According to OpenCL spec, argument should be a void pointer
         {
           static const CONSTANT char format[] = "%p\n";
-          ONEAPI::printf(format, (void *)Ptr);
+          ONEAPI::experimental::printf(format, (void *)Ptr);
         }
         // CHECK-NEXT: {{(0x)?[0-9a-fA-F]+$}}
       });
@@ -111,7 +111,7 @@ int main() {
     Queue.submit([&](handler &CGH) {
       CGH.parallel_for<class stream_string>(range<1>(10), [=](id<1> i) {
         // cast to uint64_t to be sure that we pass 64-bit unsigned value
-        ONEAPI::printf(format_hello_world_2, (uint64_t)i.get(0));
+        ONEAPI::experimental::printf(format_hello_world_2, (uint64_t)i.get(0));
       });
     });
     Queue.wait();
@@ -127,8 +127,8 @@ int main() {
     // CHECK-NEXT: {{[0-9]+}}: Hello, World!
   }
 
-// FIXME: strictly check output order once the bug mentioned above is fixed
-// CHECK: {{(Hello, World!)?}}
+  // FIXME: strictly check output order once the bug mentioned above is fixed
+  // CHECK: {{(Hello, World!)?}}
 
   return 0;
 }
diff --git a/sycl/test/spec_const/spec_const_hw.cpp b/sycl/test/spec_const/spec_const_hw.cpp
index eb5bdf725c63d..65f6b91d77a49 100644
--- a/sycl/test/spec_const/spec_const_hw.cpp
+++ b/sycl/test/spec_const/spec_const_hw.cpp
@@ -35,7 +35,8 @@ int val = 10;
 int get_value() { return val; }
 
 float foo(
-    const cl::sycl::ONEAPI::spec_constant<float, MyFloatConst> &f32) {
+    const cl::sycl::ONEAPI::experimental::spec_constant<float, MyFloatConst>
+        &f32) {
   return f32;
 }
 
@@ -66,10 +67,10 @@ int main(int argc, char **argv) {
   // TODO make this floating point once supported by the compiler
   float goldf = (float)get_value();
 
-  cl::sycl::ONEAPI::spec_constant<int32_t, MyInt32Const> i32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
       program1.set_spec_constant<MyInt32Const>(goldi);
 
-  cl::sycl::ONEAPI::spec_constant<float, MyFloatConst> f32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<float, MyFloatConst> f32 =
       program2.set_spec_constant<MyFloatConst>(goldf);
 
   program1.build_with_kernel_type<KernelAAAi>();
diff --git a/sycl/test/spec_const/spec_const_neg.cpp b/sycl/test/spec_const/spec_const_neg.cpp
index a195b9ad59bc9..26b0a871398d5 100644
--- a/sycl/test/spec_const/spec_const_neg.cpp
+++ b/sycl/test/spec_const/spec_const_neg.cpp
@@ -45,7 +45,7 @@ int main(int argc, char **argv) {
             << "\n";
   cl::sycl::program program1(q.get_context());
 
-  cl::sycl::ONEAPI::spec_constant<int32_t, MyInt32Const> i32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
       program1.set_spec_constant<MyInt32Const>(10);
 
   std::vector<int> veci(1);
@@ -56,7 +56,7 @@ int main(int argc, char **argv) {
   try {
     // This is an attempt to set a spec constant after the program has been
     // built - spec_const_error should be thrown
-    cl::sycl::ONEAPI::spec_constant<int32_t, MyInt32Const> i32 =
+    cl::sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
         program1.set_spec_constant<MyInt32Const>(10);
 
     cl::sycl::buffer<int, 1> bufi(veci.data(), veci.size());
@@ -69,7 +69,7 @@ int main(int argc, char **argv) {
             acci[0] = i32.get();
           });
     });
-  } catch (cl::sycl::ONEAPI::spec_const_error &sc_err) {
+  } catch (cl::sycl::ONEAPI::experimental::spec_const_error &sc_err) {
     passed = true;
   } catch (cl::sycl::exception &e) {
     std::cout << "*** Exception caught: " << e.what() << "\n";
diff --git a/sycl/test/spec_const/spec_const_redefine.cpp b/sycl/test/spec_const/spec_const_redefine.cpp
index da589dba51db2..fc5e7dcb22ac1 100644
--- a/sycl/test/spec_const/spec_const_redefine.cpp
+++ b/sycl/test/spec_const/spec_const_redefine.cpp
@@ -68,9 +68,9 @@ int main(int argc, char **argv) {
   for (int i = 0; i < n_sc_sets; i++) {
     cl::sycl::program program(q.get_context());
     const int *sc_set = &sc_vals[i][0];
-    cl::sycl::ONEAPI::spec_constant<int32_t, SC0> sc0 =
+    cl::sycl::ONEAPI::experimental::spec_constant<int32_t, SC0> sc0 =
         program.set_spec_constant<SC0>(sc_set[0]);
-    cl::sycl::ONEAPI::spec_constant<int32_t, SC1> sc1 =
+    cl::sycl::ONEAPI::experimental::spec_constant<int32_t, SC1> sc1 =
         program.set_spec_constant<SC1>(sc_set[1]);
 
     program.build_with_kernel_type<KernelAAA>();
diff --git a/sycl/test/spec_const/spec_const_types.cpp b/sycl/test/spec_const/spec_const_types.cpp
index 29d13f8a4c23e..0a5178c5824eb 100644
--- a/sycl/test/spec_const/spec_const_types.cpp
+++ b/sycl/test/spec_const/spec_const_types.cpp
@@ -42,49 +42,49 @@ int main() {
   cl::sycl::program program(queue.get_context());
 
   // Create specialization constants.
-  cl::sycl::ONEAPI::spec_constant<bool, MyBoolConst> i1 =
+  cl::sycl::ONEAPI::experimental::spec_constant<bool, MyBoolConst> i1 =
       program.set_spec_constant<MyBoolConst>((bool)get_value());
   // CHECK-DAG: _ZTS11MyBoolConst=1|0
 
-  cl::sycl::ONEAPI::spec_constant<int8_t, MyInt8Const> i8 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int8_t, MyInt8Const> i8 =
       program.set_spec_constant<MyInt8Const>((int8_t)get_value());
   // CHECK-DAG: _ZTS11MyInt8Const=1|1
-  cl::sycl::ONEAPI::spec_constant<uint8_t, MyUInt8Const> ui8 =
+  cl::sycl::ONEAPI::experimental::spec_constant<uint8_t, MyUInt8Const> ui8 =
       program.set_spec_constant<MyUInt8Const>((uint8_t)get_value());
   // CHECK-DAG: _ZTS12MyUInt8Const=1|2
 
-  cl::sycl::ONEAPI::spec_constant<int16_t, MyInt16Const> i16 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int16_t, MyInt16Const> i16 =
       program.set_spec_constant<MyInt16Const>((int16_t)get_value());
   // CHECK-DAG: _ZTS12MyInt16Const=1|3
-  cl::sycl::ONEAPI::spec_constant<uint16_t, MyUInt16Const> ui16 =
+  cl::sycl::ONEAPI::experimental::spec_constant<uint16_t, MyUInt16Const> ui16 =
       program.set_spec_constant<MyUInt16Const>((uint16_t)get_value());
   // CHECK-DAG: _ZTS13MyUInt16Const=1|4
 
-  cl::sycl::ONEAPI::spec_constant<int32_t, MyInt32Const> i32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
       program.set_spec_constant<MyInt32Const>((int32_t)get_value());
   // CHECK-DAG: _ZTS12MyInt32Const=1|5
-  cl::sycl::ONEAPI::spec_constant<uint32_t, MyUInt32Const> ui32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<uint32_t, MyUInt32Const> ui32 =
       program.set_spec_constant<MyUInt32Const>((uint32_t)get_value());
   // CHECK-DAG: _ZTS13MyUInt32Const=1|6
 
-  cl::sycl::ONEAPI::spec_constant<int64_t, MyInt64Const> i64 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int64_t, MyInt64Const> i64 =
       program.set_spec_constant<MyInt64Const>((int64_t)get_value());
   // CHECK-DAG: _ZTS12MyInt64Const=1|7
-  cl::sycl::ONEAPI::spec_constant<uint64_t, MyUInt64Const> ui64 =
+  cl::sycl::ONEAPI::experimental::spec_constant<uint64_t, MyUInt64Const> ui64 =
       program.set_spec_constant<MyUInt64Const>((uint64_t)get_value());
   // CHECK-DAG: _ZTS13MyUInt64Const=1|8
 
 #define HALF 0 // TODO not yet supported
 #if HALF
-  cl::sycl::ONEAPI::spec_constant<cl::sycl::half, MyHalfConst> f16 =
-      program.set_spec_constant<MyHalfConst>((cl::sycl::half)get_value());
+  cl::sycl::ONEAPI::experimental::spec_constant<cl::sycl::half, MyHalfConst>
+      f16 = program.set_spec_constant<MyHalfConst>((cl::sycl::half)get_value());
 #endif
 
-  cl::sycl::ONEAPI::spec_constant<float, MyFloatConst> f32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<float, MyFloatConst> f32 =
       program.set_spec_constant<MyFloatConst>((float)get_value());
   // CHECK-DAG: _ZTS12MyFloatConst=1|9
 
-  cl::sycl::ONEAPI::spec_constant<double, MyDoubleConst> f64 =
+  cl::sycl::ONEAPI::experimental::spec_constant<double, MyDoubleConst> f64 =
       program.set_spec_constant<MyDoubleConst>((double)get_value());
   // CHECK-DAG: _ZTS13MyDoubleConst=1|10
 
@@ -97,16 +97,14 @@ int main() {
     queue.submit([&](cl::sycl::handler &cgh) {
       auto acc = buf.get_access<cl::sycl::access::mode::write>(cgh);
       cgh.single_task<SpecializedKernel>(
-          program.get_kernel<SpecializedKernel>(),
-          [=]() { acc[0] = i1.get() +
-                           i8.get() + ui8.get() +
-                           i16.get() + ui16.get() +
-                           i32.get() + ui32.get() +
-                           i64.get() + ui64.get() +
+          program.get_kernel<SpecializedKernel>(), [=]() {
+            acc[0] = i1.get() + i8.get() + ui8.get() + i16.get() + ui16.get() +
+                     i32.get() + ui32.get() + i64.get() + ui64.get() +
 #if HALF
-                           f16.get() +
+                     f16.get() +
 #endif
-                           f32.get() + f64.get(); });
+                     f32.get() + f64.get();
+          });
     });
   }
 }

From 42c6c2725e75ac983c4cadc175c7974c1f1ea2db Mon Sep 17 00:00:00 2001
From: James Brodman <james.brodman@intel.com>
Date: Mon, 3 Aug 2020 13:55:26 -0400
Subject: [PATCH 03/12] clang-format

Signed-off-by: James Brodman <james.brodman@intel.com>
---
 sycl/include/CL/sycl.hpp                      | 12 ++++-----
 .../CL/sycl/ONEAPI/atomic_accessor.hpp        |  2 +-
 sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp  |  2 +-
 sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp    |  2 +-
 .../CL/sycl/ONEAPI/group_algorithm.hpp        |  4 +--
 sycl/include/CL/sycl/ONEAPI/reduction.hpp     |  2 +-
 sycl/include/CL/sycl/ONEAPI/sub_group.hpp     |  2 +-
 sycl/include/CL/sycl/detail/spirv.hpp         |  2 +-
 sycl/include/CL/sycl/handler.hpp              |  6 ++---
 sycl/include/CL/sycl/nd_item.hpp              |  2 +-
 sycl/source/function_pointer.cpp              |  2 +-
 .../function-pointers/fp-as-kernel-arg.cpp    |  8 +++---
 .../pass-fp-through-buffer.cpp                |  8 +++---
 sycl/test/reduction/reduction_ctor.cpp        |  6 +++--
 sycl/test/reduction/reduction_nd_ext_type.hpp | 12 ++++++---
 sycl/test/reduction/reduction_placeholder.cpp |  3 ++-
 sycl/test/reduction/reduction_transparent.cpp |  3 ++-
 sycl/test/reduction/reduction_usm.cpp         |  6 +++--
 sycl/test/sub_group/generic_reduce.cpp        | 25 +++++++++----------
 sycl/test/sub_group/scan.hpp                  | 12 ++++-----
 20 files changed, 65 insertions(+), 56 deletions(-)

diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp
index 05ca5f180df67..c027012fea489 100644
--- a/sycl/include/CL/sycl.hpp
+++ b/sycl/include/CL/sycl.hpp
@@ -8,6 +8,12 @@
 
 #pragma once
 
+#include <CL/sycl/ONEAPI/atomic.hpp>
+#include <CL/sycl/ONEAPI/experimental/builtins.hpp>
+#include <CL/sycl/ONEAPI/function_pointer.hpp>
+#include <CL/sycl/ONEAPI/group_algorithm.hpp>
+#include <CL/sycl/ONEAPI/reduction.hpp>
+#include <CL/sycl/ONEAPI/sub_group.hpp>
 #include <CL/sycl/accessor.hpp>
 #include <CL/sycl/atomic.hpp>
 #include <CL/sycl/backend.hpp>
@@ -27,12 +33,6 @@
 #include <CL/sycl/multi_ptr.hpp>
 #include <CL/sycl/nd_item.hpp>
 #include <CL/sycl/nd_range.hpp>
-#include <CL/sycl/ONEAPI/atomic.hpp>
-#include <CL/sycl/ONEAPI/experimental/builtins.hpp>
-#include <CL/sycl/ONEAPI/function_pointer.hpp>
-#include <CL/sycl/ONEAPI/group_algorithm.hpp>
-#include <CL/sycl/ONEAPI/reduction.hpp>
-#include <CL/sycl/ONEAPI/sub_group.hpp>
 #include <CL/sycl/pipes.hpp>
 #include <CL/sycl/platform.hpp>
 #include <CL/sycl/pointers.hpp>
diff --git a/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
index 60042100c859d..f9bccc9afb2bd 100644
--- a/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
@@ -8,9 +8,9 @@
 
 #pragma once
 
-#include <CL/sycl/access/access.hpp>
 #include <CL/sycl/ONEAPI/atomic_enums.hpp>
 #include <CL/sycl/ONEAPI/atomic_ref.hpp>
+#include <CL/sycl/access/access.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
diff --git a/sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp
index 4fd306192a4df..76352df6faed9 100644
--- a/sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp
@@ -9,8 +9,8 @@
 #pragma once
 
 #include <CL/__spirv/spirv_ops.hpp>
-#include <CL/sycl/detail/spirv.hpp>
 #include <CL/sycl/ONEAPI/atomic_enums.hpp>
+#include <CL/sycl/detail/spirv.hpp>
 
 #ifndef __SYCL_DEVICE_ONLY__
 #include <atomic>
diff --git a/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
index 05909cc5d4fda..5031897a366bf 100644
--- a/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
@@ -9,11 +9,11 @@
 #pragma once
 
 #include <CL/__spirv/spirv_ops.hpp>
+#include <CL/sycl/ONEAPI/atomic_enums.hpp>
 #include <CL/sycl/access/access.hpp>
 #include <CL/sycl/atomic.hpp>
 #include <CL/sycl/detail/defines.hpp>
 #include <CL/sycl/detail/helpers.hpp>
-#include <CL/sycl/ONEAPI/atomic_enums.hpp>
 
 #ifndef __SYCL_DEVICE_ONLY__
 #include <atomic>
diff --git a/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
index 0b606bc1600bf..32b4644e1a08c 100644
--- a/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
@@ -10,11 +10,11 @@
 #include <CL/__spirv/spirv_ops.hpp>
 #include <CL/__spirv/spirv_types.hpp>
 #include <CL/__spirv/spirv_vars.hpp>
+#include <CL/sycl/ONEAPI/functional.hpp>
+#include <CL/sycl/ONEAPI/sub_group.hpp>
 #include <CL/sycl/detail/spirv.hpp>
 #include <CL/sycl/detail/type_traits.hpp>
 #include <CL/sycl/group.hpp>
-#include <CL/sycl/ONEAPI/functional.hpp>
-#include <CL/sycl/ONEAPI/sub_group.hpp>
 
 #ifndef __DISABLE_SYCL_ONEAPI_GROUP_ALGORITHMS__
 __SYCL_INLINE_NAMESPACE(cl) {
diff --git a/sycl/include/CL/sycl/ONEAPI/reduction.hpp b/sycl/include/CL/sycl/ONEAPI/reduction.hpp
index d769d448bf561..e33db4783156b 100644
--- a/sycl/include/CL/sycl/ONEAPI/reduction.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/reduction.hpp
@@ -8,9 +8,9 @@
 
 #pragma once
 
+#include <CL/sycl/ONEAPI/group_algorithm.hpp>
 #include <CL/sycl/accessor.hpp>
 #include <CL/sycl/handler.hpp>
-#include <CL/sycl/ONEAPI/group_algorithm.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
diff --git a/sycl/include/CL/sycl/ONEAPI/sub_group.hpp b/sycl/include/CL/sycl/ONEAPI/sub_group.hpp
index 513635f8ee95b..6a46a031e17a2 100644
--- a/sycl/include/CL/sycl/ONEAPI/sub_group.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/sub_group.hpp
@@ -10,6 +10,7 @@
 
 #include <CL/__spirv/spirv_ops.hpp>
 #include <CL/__spirv/spirv_vars.hpp>
+#include <CL/sycl/ONEAPI/functional.hpp>
 #include <CL/sycl/access/access.hpp>
 #include <CL/sycl/detail/defines.hpp>
 #include <CL/sycl/detail/generic_type_traits.hpp>
@@ -17,7 +18,6 @@
 #include <CL/sycl/detail/spirv.hpp>
 #include <CL/sycl/detail/type_traits.hpp>
 #include <CL/sycl/id.hpp>
-#include <CL/sycl/ONEAPI/functional.hpp>
 #include <CL/sycl/range.hpp>
 #include <CL/sycl/types.hpp>
 
diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp
index 9e0f92b1c5965..444cb8ce27dd4 100644
--- a/sycl/include/CL/sycl/detail/spirv.hpp
+++ b/sycl/include/CL/sycl/detail/spirv.hpp
@@ -10,9 +10,9 @@
 #include <CL/__spirv/spirv_ops.hpp>
 #include <CL/__spirv/spirv_types.hpp>
 #include <CL/__spirv/spirv_vars.hpp>
+#include <CL/sycl/ONEAPI/atomic_enums.hpp>
 #include <CL/sycl/detail/generic_type_traits.hpp>
 #include <CL/sycl/detail/type_traits.hpp>
-#include <CL/sycl/ONEAPI/atomic_enums.hpp>
 
 #ifdef __SYCL_DEVICE_ONLY__
 __SYCL_INLINE_NAMESPACE(cl) {
diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp
index 6eff9c0144d21..8ac084ef73e4a 100644
--- a/sycl/include/CL/sycl/handler.hpp
+++ b/sycl/include/CL/sycl/handler.hpp
@@ -1004,7 +1004,7 @@ class __SYCL_EXPORT handler {
                       Reduction::has_fast_atomics && !Reduction::is_usm>
   parallel_for(nd_range<Dims> Range, Reduction Redu, KernelType KernelFunc) {
     ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
-                                          Redu.getUserAccessor());
+                                           Redu.getUserAccessor());
   }
 
   /// Implements parallel_for() accepting nd_range and 1 reduction variable
@@ -1017,7 +1017,7 @@ class __SYCL_EXPORT handler {
                       Reduction::has_fast_atomics && Reduction::is_usm>
   parallel_for(nd_range<Dims> Range, Reduction Redu, KernelType KernelFunc) {
     ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
-                                          Redu.getUSMPointer());
+                                           Redu.getUSMPointer());
   }
 
   /// Implements parallel_for() accepting nd_range and 1 reduction variable
@@ -1038,7 +1038,7 @@ class __SYCL_EXPORT handler {
     shared_ptr_class<detail::queue_impl> QueueCopy = MQueue;
     auto RWAcc = Redu.getReadWriteScalarAcc(*this);
     ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
-                                          RWAcc);
+                                           RWAcc);
     this->finalize();
 
     // Copy from RWAcc to user's reduction accessor.
diff --git a/sycl/include/CL/sycl/nd_item.hpp b/sycl/include/CL/sycl/nd_item.hpp
index 37c61b03525d3..6fa2babf2c95f 100644
--- a/sycl/include/CL/sycl/nd_item.hpp
+++ b/sycl/include/CL/sycl/nd_item.hpp
@@ -9,6 +9,7 @@
 #pragma once
 
 #include <CL/__spirv/spirv_ops.hpp>
+#include <CL/sycl/ONEAPI/sub_group.hpp>
 #include <CL/sycl/access/access.hpp>
 #include <CL/sycl/detail/defines.hpp>
 #include <CL/sycl/detail/helpers.hpp>
@@ -16,7 +17,6 @@
 #include <CL/sycl/id.hpp>
 #include <CL/sycl/item.hpp>
 #include <CL/sycl/nd_range.hpp>
-#include <CL/sycl/ONEAPI/sub_group.hpp>
 #include <CL/sycl/range.hpp>
 
 #include <cstddef>
diff --git a/sycl/source/function_pointer.cpp b/sycl/source/function_pointer.cpp
index cf67d32afd9f1..da4712f4abe25 100644
--- a/sycl/source/function_pointer.cpp
+++ b/sycl/source/function_pointer.cpp
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <detail/device_impl.hpp>
 #include <CL/sycl/ONEAPI/function_pointer.hpp>
+#include <detail/device_impl.hpp>
 #include <detail/program_impl.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
diff --git a/sycl/test/function-pointers/fp-as-kernel-arg.cpp b/sycl/test/function-pointers/fp-as-kernel-arg.cpp
index 66da8a4a640cd..0c34ccc5cc607 100644
--- a/sycl/test/function-pointers/fp-as-kernel-arg.cpp
+++ b/sycl/test/function-pointers/fp-as-kernel-arg.cpp
@@ -54,10 +54,10 @@ int main() {
     auto AccB = BufB.template get_access<cl::sycl::access::mode::read>(CGH);
     CGH.parallel_for<class K>(
         KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) {
-      auto Fptr =
-          cl::sycl::ONEAPI::to_device_func_ptr<decltype(add)>(FptrStorage);
-      AccA[Index] = Fptr(AccA[Index], AccB[Index]);
-    });
+          auto Fptr =
+              cl::sycl::ONEAPI::to_device_func_ptr<decltype(add)>(FptrStorage);
+          AccA[Index] = Fptr(AccA[Index], AccB[Index]);
+        });
   });
 
   auto HostAcc = BufA.get_access<cl::sycl::access::mode::read>();
diff --git a/sycl/test/function-pointers/pass-fp-through-buffer.cpp b/sycl/test/function-pointers/pass-fp-through-buffer.cpp
index 495513d08c87c..26685ec11a794 100644
--- a/sycl/test/function-pointers/pass-fp-through-buffer.cpp
+++ b/sycl/test/function-pointers/pass-fp-through-buffer.cpp
@@ -69,11 +69,11 @@ int main() {
           DispatchTable.template get_access<cl::sycl::access::mode::read>(CGH);
       CGH.parallel_for<class K>(
           KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) {
-        auto FP =
-            cl::sycl::ONEAPI::to_device_func_ptr<int(int, int)>(AccDT[Mode]);
+            auto FP = cl::sycl::ONEAPI::to_device_func_ptr<int(int, int)>(
+                AccDT[Mode]);
 
-        AccA[Index] = FP(AccA[Index], AccB[Index]);
-      });
+            AccA[Index] = FP(AccA[Index], AccB[Index]);
+          });
     });
 
     auto HostAcc = bufA.get_access<cl::sycl::access::mode::read>();
diff --git a/sycl/test/reduction/reduction_ctor.cpp b/sycl/test/reduction/reduction_ctor.cpp
index 052437130c8b3..4828b1dc94535 100644
--- a/sycl/test/reduction/reduction_ctor.cpp
+++ b/sycl/test/reduction/reduction_ctor.cpp
@@ -93,8 +93,10 @@ int main() {
   testBoth<int>(0, ONEAPI::bit_or<int>(), 1, 8);
   testBoth<int>(0, ONEAPI::bit_xor<int>(), 7, 3);
   testBoth<int>(~0, ONEAPI::bit_and<int>(), 7, 3);
-  testBoth<int>((std::numeric_limits<int>::max)(), ONEAPI::minimum<int>(), 7, 3);
-  testBoth<int>((std::numeric_limits<int>::min)(), ONEAPI::maximum<int>(), 7, 3);
+  testBoth<int>((std::numeric_limits<int>::max)(), ONEAPI::minimum<int>(), 7,
+                3);
+  testBoth<int>((std::numeric_limits<int>::min)(), ONEAPI::maximum<int>(), 7,
+                3);
 
   testBoth<float>(0, ONEAPI::plus<float>(), 1, 7);
   testBoth<float>(1, std::multiplies<float>(), 1, 7);
diff --git a/sycl/test/reduction/reduction_nd_ext_type.hpp b/sycl/test/reduction/reduction_nd_ext_type.hpp
index 4cb182e82c7ab..f81a913837d46 100644
--- a/sycl/test/reduction/reduction_nd_ext_type.hpp
+++ b/sycl/test/reduction/reduction_nd_ext_type.hpp
@@ -66,13 +66,17 @@ int runTests(const string_class &ExtensionName) {
   test<T, 1, access::mode::read_write, std::multiplies<T>>(0, 4, 4);
   test<T, 0, access::mode::discard_write, ONEAPI::plus<T>>(0, 4, 64);
 
-  test<T, 0, access::mode::read_write, ONEAPI::minimum<T>>(getMaximumFPValue<T>(), 7, 7);
-  test<T, 1, access::mode::discard_write, ONEAPI::maximum<T>>(getMinimumFPValue<T>(), 7, 7 * 5);
+  test<T, 0, access::mode::read_write, ONEAPI::minimum<T>>(
+      getMaximumFPValue<T>(), 7, 7);
+  test<T, 1, access::mode::discard_write, ONEAPI::maximum<T>>(
+      getMinimumFPValue<T>(), 7, 7 * 5);
 
 #if __cplusplus >= 201402L
   test<T, 1, access::mode::read_write, ONEAPI::plus<>>(1, 3, 3 * 5);
-  test<T, 1, access::mode::discard_write, ONEAPI::minimum<>>(getMaximumFPValue<T>(), 3, 3);
-  test<T, 0, access::mode::discard_write, ONEAPI::maximum<>>(getMinimumFPValue<T>(), 3, 3);
+  test<T, 1, access::mode::discard_write, ONEAPI::minimum<>>(
+      getMaximumFPValue<T>(), 3, 3);
+  test<T, 0, access::mode::discard_write, ONEAPI::maximum<>>(
+      getMinimumFPValue<T>(), 3, 3);
 #endif // __cplusplus >= 201402L
 
   std::cout << "Test passed\n";
diff --git a/sycl/test/reduction/reduction_placeholder.cpp b/sycl/test/reduction/reduction_placeholder.cpp
index 03278e44939ff..77633992ea2df 100644
--- a/sycl/test/reduction/reduction_placeholder.cpp
+++ b/sycl/test/reduction/reduction_placeholder.cpp
@@ -7,7 +7,8 @@
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
 // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out
-// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and barrier()
+// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and
+// barrier()
 
 // This test performs basic checks of parallel_for(nd_range, reduction, func)
 // with reductions initialized with a placeholder accessor.
diff --git a/sycl/test/reduction/reduction_transparent.cpp b/sycl/test/reduction/reduction_transparent.cpp
index 64eecca05663b..56e32ddfe6948 100644
--- a/sycl/test/reduction/reduction_transparent.cpp
+++ b/sycl/test/reduction/reduction_transparent.cpp
@@ -46,7 +46,8 @@ void testId(T Identity, size_t WGSize, size_t NWItems) {
     range<1> LocalRange(WGSize);
     nd_range<1> NDRange(GlobalRange, LocalRange);
     CGH.parallel_for<SomeIdClass<T, Dim, BinaryOperation>>(
-        NDRange, ONEAPI::reduction(Out, Identity, BOp), [=](nd_item<1> NDIt, auto &Sum) {
+        NDRange, ONEAPI::reduction(Out, Identity, BOp),
+        [=](nd_item<1> NDIt, auto &Sum) {
           Sum.combine(In[NDIt.get_global_linear_id()]);
         });
   });
diff --git a/sycl/test/reduction/reduction_usm.cpp b/sycl/test/reduction/reduction_usm.cpp
index 54203771cc73e..0ada4c515b615 100644
--- a/sycl/test/reduction/reduction_usm.cpp
+++ b/sycl/test/reduction/reduction_usm.cpp
@@ -7,7 +7,8 @@
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
 // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out
-// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and barrier()
+// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and
+// barrier()
 
 // This test performs basic checks of parallel_for(nd_range, reduction, func)
 // with reductions initialized with USM var.
@@ -113,7 +114,8 @@ int main() {
   testUSM<int, 1, ONEAPI::bit_or<int>>(0, 4, 128);
 
   // fast reduce
-  testUSM<float, 1, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 5, 5 * 7);
+  testUSM<float, 1, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 5,
+                                            5 * 7);
   testUSM<float, 0, ONEAPI::maximum<float>>(getMinimumFPValue<float>(), 4, 128);
 
   // generic algorithm
diff --git a/sycl/test/sub_group/generic_reduce.cpp b/sycl/test/sub_group/generic_reduce.cpp
index 997a577c0d051..fdad3289ec10e 100644
--- a/sycl/test/sub_group/generic_reduce.cpp
+++ b/sycl/test/sub_group/generic_reduce.cpp
@@ -24,19 +24,18 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
     Queue.submit([&](handler &cgh) {
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
       auto acc = buf.template get_access<access::mode::read_write>(cgh);
-      cgh.parallel_for(
-          NdRange, [=](nd_item<1> NdItem) {
-            ONEAPI::sub_group sg = NdItem.get_sub_group();
-            if (skip_init) {
-              acc[NdItem.get_global_id(0)] =
-                  reduce(sg, T(NdItem.get_global_id(0)), op);
-            } else {
-              acc[NdItem.get_global_id(0)] =
-                  reduce(sg, T(NdItem.get_global_id(0)), init, op);
-            }
-            if (NdItem.get_global_id(0) == 0)
-              sgsizeacc[0] = sg.get_max_local_range()[0];
-          });
+      cgh.parallel_for(NdRange, [=](nd_item<1> NdItem) {
+        ONEAPI::sub_group sg = NdItem.get_sub_group();
+        if (skip_init) {
+          acc[NdItem.get_global_id(0)] =
+              reduce(sg, T(NdItem.get_global_id(0)), op);
+        } else {
+          acc[NdItem.get_global_id(0)] =
+              reduce(sg, T(NdItem.get_global_id(0)), init, op);
+        }
+        if (NdItem.get_global_id(0) == 0)
+          sgsizeacc[0] = sg.get_max_local_range()[0];
+      });
     });
     auto acc = buf.template get_access<access::mode::read_write>();
     auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
diff --git a/sycl/test/sub_group/scan.hpp b/sycl/test/sub_group/scan.hpp
index c84a145c83273..dbf93553172e8 100644
--- a/sycl/test/sub_group/scan.hpp
+++ b/sycl/test/sub_group/scan.hpp
@@ -90,18 +90,18 @@ void check(queue &Queue, size_t G = 120, size_t L = 60) {
     check_op<T>(Queue, std::numeric_limits<T>::infinity(), ONEAPI::minimum<T>(),
                 true, G, L);
   } else {
-    check_op<T>(Queue, std::numeric_limits<T>::max(), ONEAPI::minimum<T>(), true,
-                G, L);
+    check_op<T>(Queue, std::numeric_limits<T>::max(), ONEAPI::minimum<T>(),
+                true, G, L);
   }
 
   check_op<T>(Queue, T(G), ONEAPI::maximum<T>(), false, G, L);
   if (std::is_floating_point<T>::value ||
       std::is_same<T, cl::sycl::half>::value) {
-    check_op<T>(Queue, -std::numeric_limits<T>::infinity(), ONEAPI::maximum<T>(),
-                true, G, L);
+    check_op<T>(Queue, -std::numeric_limits<T>::infinity(),
+                ONEAPI::maximum<T>(), true, G, L);
   } else {
-    check_op<T>(Queue, std::numeric_limits<T>::min(), ONEAPI::maximum<T>(), true,
-                G, L);
+    check_op<T>(Queue, std::numeric_limits<T>::min(), ONEAPI::maximum<T>(),
+                true, G, L);
   }
 
 #if __cplusplus >= 201402L

From c7ce1d5defcd83b783d41b4c54ba870eae13df27 Mon Sep 17 00:00:00 2001
From: James Brodman <james.brodman@intel.com>
Date: Mon, 3 Aug 2020 14:33:22 -0400
Subject: [PATCH 04/12] Fix include hell and a dangling namespace

Signed-off-by: James Brodman <james.brodman@intel.com>
---
 sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp | 1 +
 sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp      | 2 +-
 sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp | 1 +
 sycl/include/CL/sycl/detail/spirv.hpp           | 4 +++-
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
index f9bccc9afb2bd..8aa8db2d75425 100644
--- a/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
@@ -11,6 +11,7 @@
 #include <CL/sycl/ONEAPI/atomic_enums.hpp>
 #include <CL/sycl/ONEAPI/atomic_ref.hpp>
 #include <CL/sycl/access/access.hpp>
+#include <CL/sycl/accessor.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
diff --git a/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
index 5031897a366bf..f9f8908e32543 100644
--- a/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
@@ -13,7 +13,7 @@
 #include <CL/sycl/access/access.hpp>
 #include <CL/sycl/atomic.hpp>
 #include <CL/sycl/detail/defines.hpp>
-#include <CL/sycl/detail/helpers.hpp>
+#include <CL/sycl/detail/spirv.hpp>
 
 #ifndef __SYCL_DEVICE_ONLY__
 #include <atomic>
diff --git a/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
index 32b4644e1a08c..66288093c71d7 100644
--- a/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
@@ -15,6 +15,7 @@
 #include <CL/sycl/detail/spirv.hpp>
 #include <CL/sycl/detail/type_traits.hpp>
 #include <CL/sycl/group.hpp>
+#include <CL/sycl/nd_item.hpp>
 
 #ifndef __DISABLE_SYCL_ONEAPI_GROUP_ALGORITHMS__
 __SYCL_INLINE_NAMESPACE(cl) {
diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp
index 444cb8ce27dd4..95959c6ec7a37 100644
--- a/sycl/include/CL/sycl/detail/spirv.hpp
+++ b/sycl/include/CL/sycl/detail/spirv.hpp
@@ -12,7 +12,9 @@
 #include <CL/__spirv/spirv_vars.hpp>
 #include <CL/sycl/ONEAPI/atomic_enums.hpp>
 #include <CL/sycl/detail/generic_type_traits.hpp>
+#include <CL/sycl/detail/helpers.hpp>
 #include <CL/sycl/detail/type_traits.hpp>
+#include <CL/sycl/id.hpp>
 
 #ifdef __SYCL_DEVICE_ONLY__
 __SYCL_INLINE_NAMESPACE(cl) {
@@ -107,7 +109,7 @@ using EnableIfGenericBroadcast = detail::enable_if_t<
 // Work-group supports any integral type
 // Sub-group currently supports only uint32_t
 template <typename Group> struct GroupId { using type = size_t; };
-template <> struct GroupId<::cl::sycl::intel::sub_group> {
+template <> struct GroupId<::cl::sycl::ONEAPI::sub_group> {
   using type = uint32_t;
 };
 template <typename Group, typename T, typename IdT>

From bd26c2113197382ec1e774b0d2b34bf2a276d8b5 Mon Sep 17 00:00:00 2001
From: James Brodman <james.brodman@intel.com>
Date: Mon, 3 Aug 2020 14:36:52 -0400
Subject: [PATCH 05/12] clang-format

Signed-off-by: James Brodman <james.brodman@intel.com>
---
 sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp    | 2 +-
 sycl/include/CL/sycl/detail/spirv.hpp         | 3 ++-
 sycl/test/reduction/reduction_transparent.cpp | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
index f9f8908e32543..625a8902d9c86 100644
--- a/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
@@ -1,4 +1,4 @@
-//==----- atomic_ref.hpp - SYCL_ONEAPI_extended_atomics atomic_ref ----------==//
+//==----- atomic_ref.hpp - SYCL_ONEAPI_extended_atomics atomic_ref ---------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp
index 95959c6ec7a37..b1f86c0c7ec42 100644
--- a/sycl/include/CL/sycl/detail/spirv.hpp
+++ b/sycl/include/CL/sycl/detail/spirv.hpp
@@ -231,7 +231,8 @@ getMemorySemanticsMask(ONEAPI::memory_order Order) {
       __spv::MemorySemanticsMask::CrossWorkgroupMemory);
 }
 
-static inline constexpr __spv::Scope::Flag getScope(ONEAPI::memory_scope Scope) {
+static inline constexpr __spv::Scope::Flag
+getScope(ONEAPI::memory_scope Scope) {
   switch (Scope) {
   case ONEAPI::memory_scope::work_item:
     return __spv::Scope::Invocation;
diff --git a/sycl/test/reduction/reduction_transparent.cpp b/sycl/test/reduction/reduction_transparent.cpp
index 56e32ddfe6948..dea789b395401 100644
--- a/sycl/test/reduction/reduction_transparent.cpp
+++ b/sycl/test/reduction/reduction_transparent.cpp
@@ -7,7 +7,8 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 
 // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out
-// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and barrier()
+// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and
+// barrier()
 
 // This test performs basic checks of parallel_for(nd_range, reduction, func)
 // where func is a transparent functor.

From 6308c5d7d29b371db15f962585fa0fd86c4da25e Mon Sep 17 00:00:00 2001
From: James Brodman <james.brodman@intel.com>
Date: Mon, 3 Aug 2020 14:50:26 -0400
Subject: [PATCH 06/12] AlexeyB var name simplification

Signed-off-by: James Brodman <james.brodman@intel.com>
---
 llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll b/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll
index 185a49a0e0798..122e4eea3fce5 100644
--- a/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll
+++ b/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll
@@ -7,7 +7,7 @@
 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
 target triple = "spir64-unknown-unknown-sycldevice"
 
-%"sycl::ONEAPI::experimental::spec_constant" = type { i8 }
+%"spec_constant" = type { i8 }
 
 @SCSymID = private unnamed_addr constant [10 x i8] c"SpecConst\00", align 1
 ; CHECK-NOT: @SCSymID
@@ -21,7 +21,7 @@ define weak_odr dso_local spir_kernel void @Kernel() {
 }
 
 ; Function Attrs: norecurse
-define dso_local spir_func float @foo_float(%"sycl::ONEAPI::experimental::spec_constant" addrspace(4)* nocapture readnone dereferenceable(1) %0) local_unnamed_addr #3 {
+define dso_local spir_func float @foo_float(%"spec_constant" addrspace(4)* nocapture readnone dereferenceable(1) %0) local_unnamed_addr #3 {
   %2 = tail call spir_func float @_Z27__sycl_getSpecConstantValueIfET_PKc(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([10 x i8], [10 x i8]* @SCSymID, i64 0, i64 0) to i8 addrspace(4)*))
   ret float %2
 }

From 62075217d8c77981bb1ee8780a2cc9bdbee84fcd Mon Sep 17 00:00:00 2001
From: James Brodman <james.brodman@intel.com>
Date: Mon, 3 Aug 2020 15:59:48 -0400
Subject: [PATCH 07/12] update dangling ns in test and clang-format

Signed-off-by: James Brodman <james.brodman@intel.com>
---
 sycl/test/aot/spec_const_aot.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/sycl/test/aot/spec_const_aot.cpp b/sycl/test/aot/spec_const_aot.cpp
index 99b451fe6d7ca..706b618438b40 100644
--- a/sycl/test/aot/spec_const_aot.cpp
+++ b/sycl/test/aot/spec_const_aot.cpp
@@ -32,10 +32,11 @@ int main(int argc, char **argv) {
     }
   });
 
-  std::cout << "Running on " << q.get_device().get_info<info::device::name>() << "\n";
+  std::cout << "Running on " << q.get_device().get_info<info::device::name>()
+            << "\n";
   cl::sycl::program prog(q.get_context());
 
-  cl::sycl::experimental::spec_constant<int32_t, MyInt32Const> i32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
       prog.set_spec_constant<MyInt32Const>(10);
 
   prog.build_with_kernel_type<Kernel>();
@@ -46,11 +47,8 @@ int main(int argc, char **argv) {
 
     q.submit([&](cl::sycl::handler &cgh) {
       auto acc = buf.get_access<cl::sycl::access::mode::write>(cgh);
-      cgh.single_task<Kernel>(
-          prog.get_kernel<Kernel>(),
-          [=]() {
-            acc[0] = i32.get();
-          });
+      cgh.single_task<Kernel>(prog.get_kernel<Kernel>(),
+                              [=]() { acc[0] = i32.get(); });
     });
   }
   bool passed = true;

From 2fe18fe7f883f5481fd6aceade642986156e39ec Mon Sep 17 00:00:00 2001
From: James Brodman <james.brodman@intel.com>
Date: Tue, 4 Aug 2020 12:55:33 -0400
Subject: [PATCH 08/12] Update comment and bump ABI version

Signed-off-by: James Brodman <james.brodman@intel.com>
---
 sycl/CMakeLists.txt                             | 2 +-
 sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt
index 1b7ae47f7b7fb..1242cc7dc50a0 100644
--- a/sycl/CMakeLists.txt
+++ b/sycl/CMakeLists.txt
@@ -14,7 +14,7 @@ include(AddSYCLExecutable)
 set(SYCL_MAJOR_VERSION 2)
 set(SYCL_MINOR_VERSION 1)
 set(SYCL_PATCH_VERSION 0)
-set(SYCL_DEV_ABI_VERSION 4)
+set(SYCL_DEV_ABI_VERSION 5)
 if (SYCL_ADD_DEV_VERSION_POSTFIX)
   set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}")
 endif()
diff --git a/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
index 8aa8db2d75425..ed415a90891c7 100644
--- a/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
@@ -1,4 +1,4 @@
-//==--- atomic_accessor.hpp - SYCL_INTEL_extended_atomics atomic_accessor --==//
+//==-- atomic_accessor.hpp - SYCL_ONEAPI_extended_atomics atomic_accessor --==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

From 4d309217fe91239a245bdaa4c2438fe5d4c83fd8 Mon Sep 17 00:00:00 2001
From: James Brodman <james.brodman@intel.com>
Date: Tue, 4 Aug 2020 15:44:50 -0400
Subject: [PATCH 09/12] Update spec const tests

Signed-off-by: James Brodman <james.brodman@intel.com>
---
 clang/test/SemaSYCL/spec-const-kernel-arg.cpp | 12 ++++++------
 sycl/test/spec_const/spec_const_hw.cpp        |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/clang/test/SemaSYCL/spec-const-kernel-arg.cpp b/clang/test/SemaSYCL/spec-const-kernel-arg.cpp
index d40e5296949a2..d7937cdc95204 100644
--- a/clang/test/SemaSYCL/spec-const-kernel-arg.cpp
+++ b/clang/test/SemaSYCL/spec-const-kernel-arg.cpp
@@ -6,12 +6,12 @@
 #include <sycl.hpp>
 
 struct SpecConstantsWrapper {
-  cl::sycl::experimental::spec_constant<int, class sc_name1> SC1;
-  cl::sycl::experimental::spec_constant<int, class sc_name2> SC2;
+  cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name1> SC1;
+  cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name2> SC2;
 };
 
 int main() {
-  cl::sycl::experimental::spec_constant<char, class MyInt32Const> SC;
+  cl::sycl::ONEAPI::experimental::spec_constant<char, class MyInt32Const> SC;
   SpecConstantsWrapper W;
   cl::sycl::kernel_single_task<class kernel_sc>(
       [=]() {
@@ -23,7 +23,7 @@ int main() {
 // CHECK: FunctionDecl {{.*}}kernel_sc{{.*}} 'void ()'
 // CHECK: VarDecl {{.*}}'(lambda at {{.*}}'
 // CHECK-NEXT: InitListExpr {{.*}}'(lambda at {{.*}}'
-// CHECK-NEXT: CXXConstructExpr {{.*}}'cl::sycl::experimental::spec_constant<char, class MyInt32Const>':'cl::sycl::experimental::spec_constant<char, MyInt32Const>'
+// CHECK-NEXT: CXXConstructExpr {{.*}}'cl::sycl::ONEAPI::experimental::spec_constant<char, class MyInt32Const>':'cl::sycl::experimental::spec_constant<char, MyInt32Const>'
 // CHECK-NEXT: InitListExpr {{.*}} 'SpecConstantsWrapper'
-// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::experimental::spec_constant<int, class sc_name1>':'cl::sycl::experimental::spec_constant<int, sc_name1>'
-// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::experimental::spec_constant<int, class sc_name2>':'cl::sycl::experimental::spec_constant<int, sc_name2>'
+// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name1>':'cl::sycl::experimental::spec_constant<int, sc_name1>'
+// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name2>':'cl::sycl::experimental::spec_constant<int, sc_name2>'
diff --git a/sycl/test/spec_const/spec_const_hw.cpp b/sycl/test/spec_const/spec_const_hw.cpp
index 895e62b893e6c..bc6800a942e85 100644
--- a/sycl/test/spec_const/spec_const_hw.cpp
+++ b/sycl/test/spec_const/spec_const_hw.cpp
@@ -45,8 +45,8 @@ struct SCWrapper {
       : SC1(p.set_spec_constant<class sc_name1, int>(4)),
         SC2(p.set_spec_constant<class sc_name2, int>(2)) {}
 
-  cl::sycl::experimental::spec_constant<int, class sc_name1> SC1;
-  cl::sycl::experimental::spec_constant<int, class sc_name2> SC2;
+  cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name1> SC1;
+  cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name2> SC2;
 };
 
 int main(int argc, char **argv) {

From cd13ac5ea75e2f5335e682b20e098386e2b87e3b Mon Sep 17 00:00:00 2001
From: James Brodman <james.brodman@intel.com>
Date: Tue, 4 Aug 2020 17:14:23 -0400
Subject: [PATCH 10/12] Update group alg barrier and test

Signed-off-by: James Brodman <james.brodman@intel.com>
---
 sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp | 6 +++---
 sycl/test/group-algorithm/barrier.cpp           | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
index 7e6913196d439..858b68fc72f13 100644
--- a/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
@@ -81,11 +81,11 @@ template <> inline id<3> linear_id_to_id(range<3> r, size_t linear_id) {
 
 // TODO: Replace with Group::fence_scope from SYCL 2020 provisional
 template <typename Group> struct FenceScope {
-  static constexpr intel::memory_scope value = intel::memory_scope::work_group;
+  static constexpr ONEAPI::memory_scope value = ONEAPI::memory_scope::work_group;
 };
 
-template <> struct FenceScope<intel::sub_group> {
-  static constexpr intel::memory_scope value = intel::memory_scope::sub_group;
+template <> struct FenceScope<ONEAPI::sub_group> {
+  static constexpr ONEAPI::memory_scope value = ONEAPI::memory_scope::sub_group;
 };
 
 template <typename T, class BinaryOperation> struct identity {};
diff --git a/sycl/test/group-algorithm/barrier.cpp b/sycl/test/group-algorithm/barrier.cpp
index e77398983d8e7..c877597c497cf 100644
--- a/sycl/test/group-algorithm/barrier.cpp
+++ b/sycl/test/group-algorithm/barrier.cpp
@@ -8,7 +8,7 @@
 #include <CL/sycl.hpp>
 #include <cassert>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 class barrier_kernel;
 

From 3231a7529a94287303236bcaca7099768a30217e Mon Sep 17 00:00:00 2001
From: James Brodman <james.brodman@intel.com>
Date: Tue, 4 Aug 2020 17:20:52 -0400
Subject: [PATCH 11/12] clang-format

Signed-off-by: James Brodman <james.brodman@intel.com>
---
 sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
index 858b68fc72f13..d4e59c657810d 100644
--- a/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
@@ -81,7 +81,8 @@ template <> inline id<3> linear_id_to_id(range<3> r, size_t linear_id) {
 
 // TODO: Replace with Group::fence_scope from SYCL 2020 provisional
 template <typename Group> struct FenceScope {
-  static constexpr ONEAPI::memory_scope value = ONEAPI::memory_scope::work_group;
+  static constexpr ONEAPI::memory_scope value =
+      ONEAPI::memory_scope::work_group;
 };
 
 template <> struct FenceScope<ONEAPI::sub_group> {

From 8f04df7d2fe30420f53caf12a82ebc2f845ba30b Mon Sep 17 00:00:00 2001
From: James Brodman <james.brodman@intel.com>
Date: Wed, 5 Aug 2020 14:18:16 -0400
Subject: [PATCH 12/12] Fix clang test

Signed-off-by: James Brodman <james.brodman@intel.com>
---
 clang/test/SemaSYCL/spec-const-kernel-arg.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/test/SemaSYCL/spec-const-kernel-arg.cpp b/clang/test/SemaSYCL/spec-const-kernel-arg.cpp
index d7937cdc95204..ba2489b93fe6f 100644
--- a/clang/test/SemaSYCL/spec-const-kernel-arg.cpp
+++ b/clang/test/SemaSYCL/spec-const-kernel-arg.cpp
@@ -23,7 +23,7 @@ int main() {
 // CHECK: FunctionDecl {{.*}}kernel_sc{{.*}} 'void ()'
 // CHECK: VarDecl {{.*}}'(lambda at {{.*}}'
 // CHECK-NEXT: InitListExpr {{.*}}'(lambda at {{.*}}'
-// CHECK-NEXT: CXXConstructExpr {{.*}}'cl::sycl::ONEAPI::experimental::spec_constant<char, class MyInt32Const>':'cl::sycl::experimental::spec_constant<char, MyInt32Const>'
+// CHECK-NEXT: CXXConstructExpr {{.*}}'cl::sycl::ONEAPI::experimental::spec_constant<char, class MyInt32Const>':'cl::sycl::ONEAPI::experimental::spec_constant<char, MyInt32Const>'
 // CHECK-NEXT: InitListExpr {{.*}} 'SpecConstantsWrapper'
-// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name1>':'cl::sycl::experimental::spec_constant<int, sc_name1>'
-// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name2>':'cl::sycl::experimental::spec_constant<int, sc_name2>'
+// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name1>':'cl::sycl::ONEAPI::experimental::spec_constant<int, sc_name1>'
+// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name2>':'cl::sycl::ONEAPI::experimental::spec_constant<int, sc_name2>'