intel · vladimirlaz · Jul 31, 2020 · Jul 20, 2020 · Jul 20, 2020 · Jul 20, 2020
@@ -1,4 +1,4 @@
-// REQUIRES: opencl
+// REQUIRES: opencl || level0
 
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out

@@ -1,5 +1,3 @@
-// REQUIRES: opencl
-
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
@@ -18,14 +16,6 @@ using namespace cl::sycl;
 
 const int dataSize = 32;
 
-bool isQueueInOrder(cl_command_queue cq) {
-  cl_command_queue_properties reportedProps;
-  cl_int iRet = clGetCommandQueueInfo(
-      cq, CL_QUEUE_PROPERTIES, sizeof(reportedProps), &reportedProps, nullptr);
-  assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device");
-  return (!(reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
-}
-
 int main() {
   int dataA[dataSize] = {0};
   int dataB[dataSize] = {0};
@@ -63,18 +53,6 @@ int main() {
       cgh.parallel_for<class ordered_reader>(myRange, myKernel);
     });
 
-    bool result = true;
-    cl_command_queue cq = Queue.get();
-    device dev = Queue.get_device();
-    bool expected_result = dev.is_host() ? true : isQueueInOrder(cq);
-
-    if (expected_result != result) {
-      std::cout << "Resulting queue order is OOO but expected order is inorder"
-                << std::endl;
-
-      return -1;
-    }
-
     auto readBufferB = bufB.get_access<access::mode::read>();
     for (size_t i = 0; i != dataSize; ++i) {
       if (readBufferB[i] != i) {

@@ -0,0 +1,51 @@
+// REQUIRES: opencl
+
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+//==-------- ordered_buffs.cpp - SYCL buffers in ordered queues test--------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include <CL/sycl.hpp>
+#include <iostream>
+
+using namespace cl::sycl;
+
+const int dataSize = 32;
+
+bool isQueueInOrder(cl_command_queue cq) {
+  cl_command_queue_properties reportedProps;
+  cl_int iRet = clGetCommandQueueInfo(
+      cq, CL_QUEUE_PROPERTIES, sizeof(reportedProps), &reportedProps, nullptr);
+  assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device");
+  return (!(reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
+}
+
+int main() {
+  int dataA[dataSize] = {0};
+  int dataB[dataSize] = {0};
+
+  {
+    queue Queue{property::queue::in_order()};
+
+    bool result = true;
+    cl_command_queue cq = Queue.get();
+    device dev = Queue.get_device();
+    bool expected_result = dev.is_host() ? true : isQueueInOrder(cq);
+
+    if (expected_result != result) {
+      std::cout << "Resulting queue order is OOO but expected order is inorder"
+                << std::endl;
+
+      return -1;
+    }
+  }
+
+  return 0;
+}
+
@@ -1,5 +1,3 @@
-// REQUIRES: opencl
-
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out -L %opencl_libs_dir -lOpenCL
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
@@ -19,15 +17,6 @@ using namespace cl::sycl;
 
 constexpr int numNodes = 4;
 
-bool getQueueOrder(cl_command_queue cq) {
-  cl_command_queue_properties reportedProps;
-  cl_int iRet = clGetCommandQueueInfo(
-      cq, CL_QUEUE_PROPERTIES, sizeof(reportedProps), &reportedProps, nullptr);
-  assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device");
-  return (reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ? false
-                                                                  : true;
-}
-
 struct Node {
   Node() : pNext(nullptr), Num(0xDEADBEEF) {}
 
@@ -103,15 +92,5 @@ int main() {
     d_cur = h_cur.pNext;
   }
 
-  bool result = true;
-  cl_command_queue cq = q.get(); 
-  bool expected_result = dev.is_host() ? true : getQueueOrder(cq);
-  if (expected_result != result) {
-    std::cout << "Resulting queue order is OOO but expected order is inorder"
-              << std::endl;
-
-    return -1;
-  }
-
   return 0;
 }
@@ -0,0 +1,46 @@
+// REQUIRES: opencl
+
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out -L %opencl_libs_dir -lOpenCL
+// RUN: %CPU_RUN_PLACEHOLDER %t1.out
+// RUN: %GPU_RUN_PLACEHOLDER %t1.out
+//==----------- ordered_dmemll.cpp - Device Memory Linked List test --------==//
+// It uses an ordered queue where explicit waiting is not necessary between
+// kernels
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <CL/sycl.hpp>
+
+using namespace cl::sycl;
+
+constexpr int numNodes = 4;
+
+bool getQueueOrder(cl_command_queue cq) {
+  cl_command_queue_properties reportedProps;
+  cl_int iRet = clGetCommandQueueInfo(
+      cq, CL_QUEUE_PROPERTIES, sizeof(reportedProps), &reportedProps, nullptr);
+  assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device");
+  return (reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ? false
+                                                                  : true;
+}
+
+int main() {
+  queue q{property::queue::in_order()};
+  auto dev = q.get_device();
+
+  bool result = true;
+  cl_command_queue cq = q.get(); 
+  bool expected_result = dev.is_host() ? true : getQueueOrder(cq);
+  if (expected_result != result) {
+    std::cout << "Resulting queue order is OOO but expected order is inorder"
+              << std::endl;
+
+    return -1;
+  }
+
+  return 0;
+}
@@ -0,0 +1,205 @@
+// REQUIRES: opencl
+
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
+// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUNx: %GPU_RUN_PLACEHOLDER %t.out
+// RUNx: %ACC_RUN_PLACEHOLDER %t.out
+
+//==--- kernel-and-program.cpp - SYCL kernel/program test ------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <CL/sycl.hpp>
+
+#include <iostream>
+#include <numeric>
+#include <string>
+#include <utility>
+
+int main() {
+  // Single task invocation methods
+  {
+    cl::sycl::queue q;
+    int data = 0;
+
+    // OpenCL interoperability kernel invocation
+    if (!q.is_host()) {
+      {
+        cl_int err;
+        cl::sycl::context ctx = q.get_context();
+        cl_context clCtx = ctx.get();
+        cl_command_queue clQ = q.get();
+        cl_mem clBuffer =
+            clCreateBuffer(clCtx, CL_MEM_WRITE_ONLY, sizeof(int), NULL, NULL);
+        err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0, sizeof(int),
+                                   &data, 0, NULL, NULL);
+        assert(err == CL_SUCCESS);
+        clFinish(clQ);
+        cl::sycl::program prog(ctx);
+        prog.build_with_source(
+            "kernel void SingleTask(global int* a) {*a+=1; }\n");
+        q.submit([&](cl::sycl::handler &cgh) {
+          cgh.set_args(clBuffer);
+          cgh.single_task(prog.get_kernel("SingleTask"));
+        });
+        q.wait();
+        err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0, sizeof(int), &data,
+                                  0, NULL, NULL);
+        clReleaseCommandQueue(clQ);
+        clReleaseContext(clCtx);
+        assert(err == CL_SUCCESS);
+        assert(data == 1);
+      }
+      {
+        cl::sycl::queue sycl_queue;
+        cl::sycl::program prog(sycl_queue.get_context());
+        prog.build_with_source("kernel void foo(global int* a, global int* b, "
+                               "global int* c) {*a=*b+*c; }\n");
+        int a = 13, b = 14, c = 15;
+        {
+          cl::sycl::buffer<int, 1> bufa(&a, cl::sycl::range<1>(1));
+          cl::sycl::buffer<int, 1> bufb(&b, cl::sycl::range<1>(1));
+          cl::sycl::buffer<int, 1> bufc(&c, cl::sycl::range<1>(1));
+          sycl_queue.submit([&](cl::sycl::handler &cgh) {
+            auto A = bufa.get_access<cl::sycl::access::mode::write>(cgh);
+            auto B = bufb.get_access<cl::sycl::access::mode::read>(cgh);
+            auto C = bufc.get_access<cl::sycl::access::mode::read>(cgh);
+            cgh.set_args(A, B, C);
+            cgh.single_task(prog.get_kernel("foo"));
+          });
+        }
+        assert(a == b + c);
+      }
+    }
+    {
+      cl::sycl::queue Queue;
+      if (!Queue.is_host()) {
+        cl::sycl::sampler first(
+            cl::sycl::coordinate_normalization_mode::normalized,
+            cl::sycl::addressing_mode::clamp, cl::sycl::filtering_mode::linear);
+        cl::sycl::sampler second(
+            cl::sycl::coordinate_normalization_mode::unnormalized,
+            cl::sycl::addressing_mode::clamp_to_edge,
+            cl::sycl::filtering_mode::nearest);
+        cl::sycl::program prog(Queue.get_context());
+        prog.build_with_source(
+            "kernel void sampler_args(int a, sampler_t first, "
+            "int b, sampler_t second, int c) {}\n");
+        cl::sycl::kernel krn = prog.get_kernel("sampler_args");
+
+        Queue.submit([&](cl::sycl::handler &cgh) {
+          cgh.set_args(0, first, 2, second, 3);
+          cgh.single_task(krn);
+        });
+      }
+    }
+  }
+  // Parallel for with range
+  {
+    cl::sycl::queue q;
+    std::vector<int> dataVec(10);
+    std::iota(dataVec.begin(), dataVec.end(), 0);
+
+    if (!q.is_host()) {
+      cl_int err;
+      {
+        cl::sycl::context ctx = q.get_context();
+        cl_context clCtx = ctx.get();
+        cl_command_queue clQ = q.get();
+        cl_mem clBuffer = clCreateBuffer(
+            clCtx, CL_MEM_WRITE_ONLY, sizeof(int) * dataVec.size(), NULL, NULL);
+        err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0,
+                                   sizeof(int) * dataVec.size(), dataVec.data(),
+                                   0, NULL, NULL);
+        assert(err == CL_SUCCESS);
+
+        cl::sycl::program prog(ctx);
+        prog.build_with_source(
+            "kernel void ParallelFor(__global int* a, int v, __local int *l) "
+            "{ size_t index = get_global_id(0); l[index] = a[index];"
+            " l[index] += v; a[index] = l[index]; }\n");
+
+        q.submit([&](cl::sycl::handler &cgh) {
+          const int value = 1;
+          auto local_acc =
+              cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write,
+                                 cl::sycl::access::target::local>(
+                  cl::sycl::range<1>(10), cgh);
+          cgh.set_args(clBuffer, value, local_acc);
+          cgh.parallel_for(cl::sycl::range<1>(10),
+                           prog.get_kernel("ParallelFor"));
+        });
+
+        q.wait();
+        err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0,
+                                  sizeof(int) * dataVec.size(), dataVec.data(),
+                                  0, NULL, NULL);
+        clReleaseCommandQueue(clQ);
+        clReleaseContext(clCtx);
+        assert(err == CL_SUCCESS);
+        for (size_t i = 0; i < dataVec.size(); ++i) {
+          assert(dataVec[i] == i + 1);
+        }
+      }
+    }
+  }
+
+  // Parallel for with nd_range
+  {
+    cl::sycl::queue q;
+    std::vector<int> dataVec(10);
+    std::iota(dataVec.begin(), dataVec.end(), 0);
+
+    if (!q.is_host()) {
+      cl_int err;
+      {
+        cl::sycl::context ctx = q.get_context();
+        cl_context clCtx = ctx.get();
+        cl_command_queue clQ = q.get();
+        cl_mem clBuffer = clCreateBuffer(
+            clCtx, CL_MEM_WRITE_ONLY, sizeof(int) * dataVec.size(), NULL, NULL);
+        err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0,
+                                   sizeof(int) * dataVec.size(), dataVec.data(),
+                                   0, NULL, NULL);
+        assert(err == CL_SUCCESS);
+
+        cl::sycl::program prog(ctx);
+        prog.build_with_source(
+            "kernel void ParallelForND( local int* l,global int* a)"
+            "{  size_t idx = get_global_id(0);"
+            "  int pos = idx & 1;"
+            "  int opp = pos ^ 1;"
+            "  l[pos] = a[get_global_id(0)];"
+            "  barrier(CLK_LOCAL_MEM_FENCE);"
+            "  a[idx]=l[opp]; }");
+
+        // TODO is there no way to set local memory size via interoperability?
+        cl::sycl::kernel krn = prog.get_kernel("ParallelForND");
+        clSetKernelArg(krn.get(), 0, sizeof(int) * 2, NULL);
+
+        q.submit([&](cl::sycl::handler &cgh) {
+          cgh.set_arg(1, clBuffer);
+          cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(10),
+                                                 cl::sycl::range<1>(2)),
+                           krn);
+        });
+
+        q.wait();
+        err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0,
+                                  sizeof(int) * dataVec.size(), dataVec.data(),
+                                  0, NULL, NULL);
+        clReleaseCommandQueue(clQ);
+        clReleaseContext(clCtx);
+        assert(err == CL_SUCCESS);
+      }
+      for (size_t i = 0; i < dataVec.size(); ++i) {
+        assert(dataVec[i] == (i ^ 1));
+      }
+    }
+  }
+}