Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit e02d78ba74efd21219b18f4083cf9afc14a16759
# Merge: e50a4ddc c12957bd
# Author: Omar Ahmed <[email protected]>
# Date: Fri Aug 9 15:41:55 2024 +0100
# Merge pull request #1933 from nrspruit/fix_driver_version_check
# [L0] Fix Driver Version check to use extension and tuple check
set(UNIFIED_RUNTIME_TAG e02d78ba74efd21219b18f4083cf9afc14a16759)
set(UNIFIED_RUNTIME_REPO "https://github.com/isaacault/unified-runtime.git")
# commit 6ded47d44cf650b7fc5d022f27b3414f6c18312b
# Author: Isaac Ault <[email protected]>
# Date: Wed Jul 24 08:57:49 2024 +0100
# [Bindless][Exp] Image Array Sub-Region Copies
# * Add support for sub-region copies.
set(UNIFIED_RUNTIME_TAG 6ded47d44cf650b7fc5d022f27b3414f6c18312b )

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,7 @@ address mode `clamp_to_edge` will be applied for all dimensions. If the
performed when sampling along the cube face borders.
====

=== Explicit copies
=== Explicit copies [[explicit_copies]]

```cpp
namespace sycl {
Expand Down Expand Up @@ -1396,9 +1396,14 @@ As with allocation, the descriptor must be populated appropriately, i.e.

=== Copying image array data [[copying_image_array_data]]

When copying to or from image arrays, the user should copy to/from the entire
array of images in one call to `ext_oneapi_copy` by passing the image arrays'
`image_mem_handle`.
When copying to or from image arrays, the user should utilize `ext_oneapi_copy`
and pass the image arrays' `image_mem_handle`, and any applicable sub-region
copy parameters, as outlined in <<explicit_copies>>.

In order to copy to specific layers of an image array, the offset and extent
parameters involved in sub-region copies must be populated such that the 3rd
dimension of the ranges represent the arrays' layer(s) being copied, regardless
of whether the copy is performed on a 1D or 2D image array.

=== Reading an image array

Expand Down Expand Up @@ -2884,4 +2889,5 @@ These features still need to be handled:
handles and the imported `interop_xxx_handle`.
|5.17|2024-07-30| - Add support for mapping external memory to linear USM using
`map_external_linear_memory`.
|5.18|2024-08-05| - Update for image-array sub-region copy support.
|======================
35 changes: 26 additions & 9 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//

#include "ur_api.h"
#include "sycl/detail/helpers.hpp"
#include "ur_api.h"
#include <algorithm>

#include <detail/config.hpp>
Expand Down Expand Up @@ -1037,10 +1037,15 @@ void handler::ext_oneapi_copy(
Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
: UrDesc.type;

// Array size is depth extent.
impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size};
} else {
UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D
: (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D
: UR_MEM_TYPE_IMAGE1D);

impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
}

ur_image_format_t UrFormat;
Expand All @@ -1052,7 +1057,6 @@ void handler::ext_oneapi_copy(

impl->MSrcOffset = {0, 0, 0};
impl->MDestOffset = {0, 0, 0};
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
impl->MSrcImageDesc = UrDesc;
impl->MDstImageDesc = UrDesc;
impl->MSrcImageFormat = UrFormat;
Expand Down Expand Up @@ -1127,7 +1131,7 @@ void handler::ext_oneapi_copy(
sycl_ext_oneapi_bindless_images>();
Desc.verify();

MSrcPtr = reinterpret_cast<void*>(Src.raw_handle);
MSrcPtr = reinterpret_cast<void *>(Src.raw_handle);
MDstPtr = Dest;

ur_image_desc_t UrDesc = {};
Expand All @@ -1147,10 +1151,15 @@ void handler::ext_oneapi_copy(
Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
: UrDesc.type;

// Array size is depth extent.
impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size};
} else {
UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D
: (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D
: UR_MEM_TYPE_IMAGE1D);

impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
}

ur_image_format_t UrFormat;
Expand All @@ -1162,7 +1171,6 @@ void handler::ext_oneapi_copy(

impl->MSrcOffset = {0, 0, 0};
impl->MDestOffset = {0, 0, 0};
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
impl->MSrcImageDesc = UrDesc;
impl->MDstImageDesc = UrDesc;
impl->MSrcImageFormat = UrFormat;
Expand All @@ -1180,8 +1188,8 @@ void handler::ext_oneapi_copy(
sycl_ext_oneapi_bindless_images>();
ImageDesc.verify();

MSrcPtr = reinterpret_cast<void*>(Src.raw_handle);
MDstPtr = reinterpret_cast<void*>(Dest.raw_handle);
MSrcPtr = reinterpret_cast<void *>(Src.raw_handle);
MDstPtr = reinterpret_cast<void *>(Dest.raw_handle);

ur_image_desc_t UrDesc = {};
UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC;
Expand All @@ -1199,11 +1207,17 @@ void handler::ext_oneapi_copy(
ImageDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
: UrDesc.type;

// Array size is depth extent.
impl->MCopyExtent = {ImageDesc.width, ImageDesc.height,
ImageDesc.array_size};
} else {
UrDesc.type = ImageDesc.depth > 0
? UR_MEM_TYPE_IMAGE3D
: (ImageDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D
: UR_MEM_TYPE_IMAGE1D);

impl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth};
}

ur_image_format_t UrFormat;
Expand All @@ -1215,7 +1229,6 @@ void handler::ext_oneapi_copy(

impl->MSrcOffset = {0, 0, 0};
impl->MDestOffset = {0, 0, 0};
impl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth};
impl->MSrcImageDesc = UrDesc;
impl->MDstImageDesc = UrDesc;
impl->MSrcImageFormat = UrFormat;
Expand All @@ -1235,7 +1248,7 @@ void handler::ext_oneapi_copy(
sycl_ext_oneapi_bindless_images>();
SrcImgDesc.verify();

MSrcPtr = reinterpret_cast<void*>(Src.raw_handle);
MSrcPtr = reinterpret_cast<void *>(Src.raw_handle);
MDstPtr = Dest;

ur_image_desc_t UrDesc = {};
Expand Down Expand Up @@ -1311,10 +1324,15 @@ void handler::ext_oneapi_copy(
Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
: UrDesc.type;

// Array size is depth extent.
impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size};
} else {
UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D
: (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D
: UR_MEM_TYPE_IMAGE1D);

impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
}

ur_image_format_t UrFormat;
Expand All @@ -1326,7 +1344,6 @@ void handler::ext_oneapi_copy(

impl->MSrcOffset = {0, 0, 0};
impl->MDestOffset = {0, 0, 0};
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
impl->MSrcImageDesc = UrDesc;
impl->MDstImageDesc = UrDesc;
impl->MSrcImageFormat = UrFormat;
Expand Down
145 changes: 145 additions & 0 deletions sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// REQUIRES: cuda

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include <iostream>
#include <sycl/detail/core.hpp>

#include <sycl/ext/oneapi/bindless_images.hpp>

// Uncomment to print additional test information
// #define VERBOSE_PRINT

class image_addition;

int main() {

sycl::device dev;
sycl::queue q(dev);
auto ctxt = q.get_context();

// declare image data
size_t width = 4;
size_t layers = 2;
size_t N = width * layers;
std::vector<float> out(N);
std::vector<float> expected(N);
std::vector<float> dataIn1(N);
std::vector<float> dataIn2(N);
for (int i = 0; i < width; i++) {
for (int j = 0; j < layers; j++) {
expected[j + ((layers)*i)] = (j + (layers)*i) * 3;
dataIn1[j + ((layers)*i)] = (j + (layers)*i);
dataIn2[j + ((layers)*i)] = (j + (layers)*i) * 2;
}
}

// Image descriptor - can use the same for both images
sycl::ext::oneapi::experimental::image_descriptor desc(
{width}, 1, sycl::image_channel_type::fp32,
sycl::ext::oneapi::experimental::image_type::array, 1, layers);

try {
// Extension: allocate memory on device and create the handle
sycl::ext::oneapi::experimental::image_mem imgMem0(desc, q);
sycl::ext::oneapi::experimental::image_mem imgMem1(desc, q);
sycl::ext::oneapi::experimental::image_mem imgMem2(desc, q);

// Extension: create the image and return the handle
sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 =
sycl::ext::oneapi::experimental::create_image(imgMem0, desc, q);
sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 =
sycl::ext::oneapi::experimental::create_image(imgMem1, desc, q);
sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle3 =
sycl::ext::oneapi::experimental::create_image(imgMem2, desc, q);

// The subregion size for the copies.
sycl::range copyExtent = {width / 2, 1, layers / 2};
// The extent of data provided on the host (vector).
sycl::range srcExtent = {width, 1, layers};

// the 4 subregion offsets used for the copies.
std::vector<sycl::range<3>> offsets{{0, 0, 0},
{width / 2, 0, 0},
{0, 0, layers / 2},
{width / 2, 0, layers / 2}};

for (auto offset : offsets) {
// Extension: Copy to image array subregion.
q.ext_oneapi_copy(dataIn1.data(), offset, srcExtent, imgMem0.get_handle(),
offset, desc, copyExtent);
// Extension: Copy to image array subregion.
q.ext_oneapi_copy(dataIn2.data(), offset, srcExtent, imgMem1.get_handle(),
offset, desc, copyExtent);
}
q.wait_and_throw();

q.submit([&](sycl::handler &cgh) {
cgh.parallel_for<image_addition>(
sycl::nd_range<2>{{width, layers}, {width, layers}},
[=](sycl::nd_item<2> it) {
size_t dim0 = it.get_local_id(0);
size_t dim1 = it.get_local_id(1);
float sum = 0;
// Extension: fetch image data from handle
float px1 =
sycl::ext::oneapi::experimental::fetch_image_array<float>(
imgHandle1, int(dim0), dim1);
float px2 =
sycl::ext::oneapi::experimental::fetch_image_array<float>(
imgHandle2, int(dim0), dim1);

sum = px1 + px2;

// Extension: write to image with handle
sycl::ext::oneapi::experimental::write_image_array<float>(
imgHandle3, int(dim0), dim1, sum);
});
});
q.wait_and_throw();

// Extension: copy data from device to host (four subregions/quadrants)
for (auto offset : offsets) {
q.ext_oneapi_copy(imgMem2.get_handle(), offset, desc, out.data(), offset,
srcExtent, copyExtent);
}
q.wait_and_throw();

// Extension: cleanup
sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, q);
sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, q);
} catch (sycl::exception e) {
std::cerr << "SYCL exception caught! : " << e.what() << "\n";
return 1;
} catch (...) {
std::cerr << "Unknown exception caught!\n";
return 2;
}

// collect and validate output
bool validated = true;
for (int i = 0; i < N; i++) {
bool mismatch = false;
if (out[i] != expected[i]) {
mismatch = true;
validated = false;
}

if (mismatch) {
#ifdef VERBOSE_PRINT
std::cout << "Result mismatch! Expected: " << expected[i]
<< ", Actual: " << out[i] << std::endl;
#else
break;
#endif
}
}
if (validated) {
std::cout << "Test passed!" << std::endl;
return 0;
}

std::cout << "Test failed!" << std::endl;
return 3;
}
Loading