diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt
index 8db34232502f7..337e68a15776d 100644
--- a/openmp/CMakeLists.txt
+++ b/openmp/CMakeLists.txt
@@ -199,8 +199,8 @@ else()
 endif()
 
 # Use the current compiler target to determine the appropriate runtime to build.
-if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn|^nvptx" OR
-   "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn|^nvptx")
+if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn|^nvptx|^spirv64" OR
+   "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn|^nvptx|^spirv64")
   add_subdirectory(device)
 else()
   add_subdirectory(module)
diff --git a/openmp/device/CMakeLists.txt b/openmp/device/CMakeLists.txt
index 5722cea1bf9e2..0e0507b3d2103 100644
--- a/openmp/device/CMakeLists.txt
+++ b/openmp/device/CMakeLists.txt
@@ -87,7 +87,10 @@ if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn" OR
 elseif("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^nvptx" OR
        "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^nvptx")
   set(target_name "nvptx")
-  list(APPEND compile_flags --cuda-feature=+ptx63)
+  list(APPEND compile_options --cuda-feature=+ptx63)
+elseif("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^spirv64" OR
+       "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^spirv64")
+  set(target_name "spirv") 
 endif()
 
 # Trick to combine these into a bitcode file via the linker's LTO pass.
@@ -110,8 +113,15 @@ target_include_directories(libompdevice PRIVATE
                            ${CMAKE_CURRENT_SOURCE_DIR}/../../libc
                            ${CMAKE_CURRENT_SOURCE_DIR}/../../offload/include)
 target_compile_options(libompdevice PRIVATE ${compile_options} ${compile_flags})
-target_link_options(libompdevice PRIVATE
-                    "-flto" "-r" "-nostdlib" "-Wl,--lto-emit-llvm")
+if(NOT "${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^spirv" AND
+   NOT "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^spirv")
+  target_link_options(libompdevice PRIVATE
+                  "-flto" "-r" "-nostdlib" "-Wl,--lto-emit-llvm")
+else()
+  target_link_options(libompdevice PRIVATE
+                  "-nostdlib" "-emit-llvm")
+endif()
+
 if(LLVM_DEFAULT_TARGET_TRIPLE)
   target_link_options(libompdevice PRIVATE "--target=${LLVM_DEFAULT_TARGET_TRIPLE}")
 endif()
diff --git a/openmp/device/include/DeviceTypes.h b/openmp/device/include/DeviceTypes.h
index f39c6d7f65702..56cd7a9a92eb4 100644
--- a/openmp/device/include/DeviceTypes.h
+++ b/openmp/device/include/DeviceTypes.h
@@ -132,7 +132,17 @@ struct IdentTy {
 
 using __kmpc_impl_lanemask_t = LaneMaskTy;
 
-using ParallelRegionFnTy = void *;
+#ifdef __SPIRV__
+// Function pointers in SPIRV backend have a special address space 9.
+// Since function pointers are passed as regular void * pointers it is
+// necessary to annotate them with proper address space to avoid casting
+// errors during compilation.
+using FnPtrTy = void [[clang::address_space(9)]] *;
+#else
+using FnPtrTy = void *;
+#endif
+
+using ParallelRegionFnTy = FnPtrTy;
 
 using CriticalNameTy = int32_t[8];
 
diff --git a/openmp/device/include/State.h b/openmp/device/include/State.h
index b8a0da538d466..0273089d09261 100644
--- a/openmp/device/include/State.h
+++ b/openmp/device/include/State.h
@@ -220,7 +220,7 @@ lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
   __builtin_unreachable();
 }
 
-[[gnu::always_inline, gnu::flatten]] inline void *&
+[[gnu::always_inline, gnu::flatten]] inline FnPtrTy &
 lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) {
   switch (Kind) {
   case state::VK_ParallelRegionFn:
diff --git a/openmp/device/include/Synchronization.h b/openmp/device/include/Synchronization.h
index d72f0c8a1696c..32c33cb4bf246 100644
--- a/openmp/device/include/Synchronization.h
+++ b/openmp/device/include/Synchronization.h
@@ -42,7 +42,20 @@ enum MemScopeTy {
 template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
 V inc(Ty *Address, V Val, atomic::OrderingTy Ordering,
       MemScopeTy MemScope = MemScopeTy::device) {
+#if defined(__SPIRV__)
+  uint32_t Old;
+  while (true) {
+    Old = load(Address, Ordering, MemScope);
+    if (Old >= Val) {
+      if (cas(Address, Old, 0u, Ordering, Ordering, MemScope))
+        break;
+    } else if (cas(Address, Old, Old + 1, Ordering, Ordering, MemScope))
+      break;
+  }
+  return Old;
+#else
   return __scoped_atomic_fetch_uinc(Address, Val, Ordering, MemScope);
+#endif
 }
 
 template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
diff --git a/openmp/device/src/Allocator.cpp b/openmp/device/src/Allocator.cpp
index 24f989197a707..fdac0de32da3a 100644
--- a/openmp/device/src/Allocator.cpp
+++ b/openmp/device/src/Allocator.cpp
@@ -44,7 +44,8 @@ extern "C" {
 }
 #endif
 
-#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
+//#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
+#if (defined(__AMDGPU__) || defined(__SPIRV__)) && !defined(OMPTARGET_HAS_LIBC)
 [[gnu::weak]] void *malloc(size_t Size) { return allocator::alloc(Size); }
 [[gnu::weak]] void free(void *Ptr) { allocator::free(Ptr); }
 #else
diff --git a/openmp/device/src/Parallelism.cpp b/openmp/device/src/Parallelism.cpp
index bd2c0799ee9f0..9f74990ce43ea 100644
--- a/openmp/device/src/Parallelism.cpp
+++ b/openmp/device/src/Parallelism.cpp
@@ -68,7 +68,7 @@ uint32_t determineNumberOfThreads(int32_t NumThreadsClause) {
 
 // Invoke an outlined parallel function unwrapping arguments (up to 32).
 [[clang::always_inline]] void invokeMicrotask(int32_t global_tid,
-                                              int32_t bound_tid, void *fn,
+                                              int32_t bound_tid, FnPtrTy fn,
                                               void **args, int64_t nargs) {
   switch (nargs) {
 #include "generated_microtask_cases.gen"
@@ -84,7 +84,7 @@ extern "C" {
 
 [[clang::always_inline]] void __kmpc_parallel_spmd(IdentTy *ident,
                                                    int32_t num_threads,
-                                                   void *fn, void **args,
+                                                   FnPtrTy fn, void **args,
                                                    const int64_t nargs) {
   uint32_t TId = mapping::getThreadIdInBlock();
   uint32_t NumThreads = determineNumberOfThreads(num_threads);
@@ -142,8 +142,8 @@ extern "C" {
 
 [[clang::always_inline]] void
 __kmpc_parallel_60(IdentTy *ident, int32_t, int32_t if_expr,
-                   int32_t num_threads, int proc_bind, void *fn,
-                   void *wrapper_fn, void **args, int64_t nargs,
+                   int32_t num_threads, int proc_bind, FnPtrTy fn,
+                   FnPtrTy wrapper_fn, void **args, int64_t nargs,
                    int32_t nt_strict) {
   uint32_t TId = mapping::getThreadIdInBlock();
 
@@ -261,7 +261,7 @@ __kmpc_parallel_60(IdentTy *ident, int32_t, int32_t if_expr,
                                           1u, true, ident,
                                           /*ForceTeamState=*/true);
     state::ValueRAII ParallelRegionFnRAII(state::ParallelRegionFn, wrapper_fn,
-                                          (void *)nullptr, true, ident,
+                                          (FnPtrTy) nullptr, true, ident,
                                           /*ForceTeamState=*/true);
     state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, true, ident,
                                      /*ForceTeamState=*/true);
diff --git a/openmp/device/src/Synchronization.cpp b/openmp/device/src/Synchronization.cpp
index af522bf66b35a..88de9442a558c 100644
--- a/openmp/device/src/Synchronization.cpp
+++ b/openmp/device/src/Synchronization.cpp
@@ -232,6 +232,37 @@ void setCriticalLock(omp_lock_t *Lock) { setLock(Lock); }
 #endif
 ///}
 
+#if defined(__SPIRV__)
+void namedBarrierInit() { __builtin_trap(); } // TODO
+void namedBarrier() { __builtin_trap(); }     // TODO
+
+void unsetLock(omp_lock_t *Lock) {
+  atomic::store((int32_t *)Lock, 0, atomic::seq_cst);
+}
+int testLock(omp_lock_t *Lock) {
+  return atomic::add((int32_t *)Lock, 0, atomic::seq_cst);
+}
+void initLock(omp_lock_t *Lock) { unsetLock(Lock); }
+void destroyLock(omp_lock_t *Lock) { unsetLock(Lock); }
+void setLock(omp_lock_t *Lock) {
+  int32_t *Lock_ptr = (int32_t *)Lock;
+  bool Acquired = false;
+  int32_t Expected;
+  while (!Acquired) {
+    Expected = 0;
+    if (Expected == atomic::load(Lock_ptr, atomic::seq_cst))
+      Acquired =
+          atomic::cas(Lock_ptr, Expected, 1, atomic::seq_cst, atomic::seq_cst);
+  }
+}
+
+void unsetCriticalLock(omp_lock_t *Lock) { unsetLock(Lock); }
+void setCriticalLock(omp_lock_t *Lock) { setLock(Lock); }
+void syncThreadsAligned(atomic::OrderingTy Ordering) {
+  synchronize::threads(Ordering);
+}
+#endif
+
 } // namespace impl
 
 void synchronize::init(bool IsSPMD) {