Skip to content

Commit

Permalink
Add generated kernel name header instead of relying on kernel name qu…
Browse files Browse the repository at this point in the history
…ery.
  • Loading branch information
aarongreig committed May 30, 2023
1 parent 16138ee commit aef17a2
Show file tree
Hide file tree
Showing 15 changed files with 250 additions and 91 deletions.
108 changes: 108 additions & 0 deletions scripts/generate_kernel_header.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""
Copyright (C) 2022 Intel Corporation
SPDX-License-Identifier: MIT
"""
import argparse
import os
import re
import subprocess
import sys

from mako.template import Template

HEADER_TEMPLATE = Template("""/*
*
* Copyright (C) 2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
* @file ${file_name}.h
*
*/
#include <map>
#include <string>
#include <vector>
namespace uur {
namespace device_binaries {
std::map<std::string, std::vector<std::string>> program_kernel_map = {
% for program, entry_points in kernel_name_dict.items():
{"${program}", {
% for entry_point in entry_points:
"${entry_point}",
% endfor
}},
% endfor
};
}
}
""")


def generate_header(output_file, kernel_name_dict):
"""Render the template and write it to the output file."""
file_name = os.path.basename(output_file)
rendered = HEADER_TEMPLATE.render(file_name=file_name,
kernel_name_dict=kernel_name_dict)
rendered = re.sub(r"\r\n", r"\n", rendered)

with open(output_file, "w") as fout:
fout.write(rendered)


def get_mangled_names(dpcxx_path, source_file, output_header):
"""Return a list of all the entry point names from a given sycl source file.
Filters out wrapper and offset handler entry points.
"""
output_dir = os.path.dirname(output_header)
il_file = os.path.join(output_dir, os.path.basename(source_file) + ".ll")
generate_il_command = f"""\
{dpcxx_path} -S -fsycl -fsycl-device-code-split=off \
-fsycl-device-only -o {il_file} {source_file}"""
subprocess.run(generate_il_command, shell=True)
kernel_line_regex = re.compile("define.*spir_kernel")
definition_lines = []
with open(il_file) as f:
lines = f.readlines()
for line in lines:
if kernel_line_regex.search(line) is not None:
definition_lines.append(line)

entry_point_names = []
kernel_name_regex = re.compile(r"@(.*?)\(")
for line in definition_lines:
if kernel_name_regex.search(line) is None:
continue
kernel_name = kernel_name_regex.search(line).group(1)
if "kernel_wrapper" not in kernel_name and "with_offset" not in kernel_name:
entry_point_names.append(kernel_name)

os.remove(il_file)
return entry_point_names


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--dpcxx_path",
help="Full path to dpc++ compiler executable.")
parser.add_argument(
"-o",
"--output",
help="Full path to header file that will be generated.")
parser.add_argument("source_files", nargs="+")
args = parser.parse_args()

mangled_names = {}

for source_file in args.source_files:
program_name = os.path.splitext(os.path.basename(source_file))[0]
mangled_names[program_name] = get_mangled_names(
args.dpcxx_path, source_file, args.output)
generate_header(args.output, mangled_names)


if __name__ == "__main__":
sys.exit(main())
1 change: 1 addition & 0 deletions test/conformance/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ function(add_conformance_test_with_kernels_environment name)
add_conformance_test(${name} ${ARGN})
target_compile_definitions("test-${name}" PRIVATE KERNELS_ENVIRONMENT PRIVATE
KERNELS_DEFAULT_DIR="${UR_CONFORMANCE_DEVICE_BINARIES_DIR}")
target_include_directories("test-${name}" PRIVATE ${UR_CONFORMANCE_DEVICE_BINARIES_DIR})
add_dependencies("test-${name}" generate_device_binaries)
endfunction()

Expand Down
31 changes: 22 additions & 9 deletions test/conformance/device_code/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,29 +1,42 @@
# Copyright (C) 2023 Intel Corporation
# SPDX-License-Identifier: MIT

function(add_device_binary SOURCE_FILE)
macro(add_device_binary SOURCE_FILE)
get_filename_component(KERNEL_NAME ${SOURCE_FILE} NAME_WE)
set(DEVICE_BINARY_DIR "${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/${KERNEL_NAME}")
file(MAKE_DIRECTORY ${DEVICE_BINARY_DIR})
foreach(TRIPLE ${TARGET_TRIPLES})
add_custom_target(${KERNEL_NAME}_${TRIPLE}_device_binary
${UR_DPCXX} -fsycl -fsycl-targets=${TRIPLE} -fsycl-device-code-split=off
${SOURCE_FILE} -o "${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}"
set(EXE_PATH "${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}")
add_custom_command(OUTPUT ${EXE_PATH}
COMMAND ${UR_DPCXX} -fsycl -fsycl-targets=${TRIPLE} -fsycl-device-code-split=off
${SOURCE_FILE} -o ${EXE_PATH}
COMMAND ${CMAKE_COMMAND} -E env SYCL_DUMP_IMAGES=true
"${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}" || (exit 0)
${EXE_PATH} || (exit 0)
WORKING_DIRECTORY "${DEVICE_BINARY_DIR}"
COMMAND ${CMAKE_COMMAND} -E remove
"${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}"
DEPENDS ${SOURCE_FILE}
)
add_dependencies(generate_device_binaries ${KERNEL_NAME}_${TRIPLE}_device_binary)
add_custom_target(generate_${KERNEL_NAME}_${TRIPLE} DEPENDS ${EXE_PATH})
add_dependencies(generate_device_binaries generate_${KERNEL_NAME}_${TRIPLE})
endforeach()
endfunction()
list(APPEND DEVICE_CODE_SOURCES ${SOURCE_FILE})
endmacro()

add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/bar.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_2d.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_3d.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_usm.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/foo.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/image_copy.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/mean.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/spec_constant.cpp)

set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h)
add_custom_command(OUTPUT ${KERNEL_HEADER}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/scripts
COMMAND ${Python3_EXECUTABLE} generate_kernel_header.py
--dpcxx_path ${UR_DPCXX} -o ${KERNEL_HEADER} ${DEVICE_CODE_SOURCES}
DEPENDS ${PROJECT_SOURCE_DIR}/scripts/generate_kernel_header.py
${DEVICE_CODE_SOURCES})
add_custom_target(kernel_names_header DEPENDS ${KERNEL_HEADER})
add_dependencies(generate_device_binaries kernel_names_header)
3 changes: 2 additions & 1 deletion test/conformance/device_code/fill_3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ int main() {
uint32_t val = 42;
cl::sycl::queue sycl_queue;

auto work_range = cl::sycl::nd_range<3>(nd_range, cl::sycl::range<3>(1, 1, 1));
auto work_range =
cl::sycl::nd_range<3>(nd_range, cl::sycl::range<3>(1, 1, 1));
auto A_buff = cl::sycl::buffer<uint32_t>(
A.data(), cl::sycl::range<1>(nd_range_x * nd_range_y));
sycl_queue.submit([&](cl::sycl::handler &cgh) {
Expand Down
20 changes: 20 additions & 0 deletions test/conformance/device_code/fill_usm.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: MIT

#include <CL/sycl.hpp>

int main() {
size_t array_size = 16;
std::vector<uint32_t> A(array_size, 1);
uint32_t val = 42;
cl::sycl::queue sycl_queue;
uint32_t *data = cl::sycl::malloc_shared<uint32_t>(array_size, sycl_queue);
sycl_queue.submit([&](cl::sycl::handler &cgh) {
cgh.parallel_for<class fill_usm>(cl::sycl::range<1>{array_size},
[data, val](cl::sycl::item<1> itemId) {
auto id = itemId.get_id(0);
data[id] = val;
});
});
return 0;
}
12 changes: 6 additions & 6 deletions test/conformance/enqueue/urEnqueueKernelLaunch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ TEST_P(urEnqueueKernelLaunchTest, InvalidNullPtrEventWaitList) {
nullptr, 1, nullptr, nullptr),
UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST);

// does this make sense??
ur_event_handle_t validEvent;
ASSERT_SUCCESS(urEnqueueEventsWait(queue, 0, nullptr, &validEvent));

Expand Down Expand Up @@ -88,8 +87,8 @@ TEST_P(urEnqueueKernelLaunch2DTest, Success) {
AddBuffer1DArg(buffer_size, &buffer);
AddPodArg(val);
ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
global_offset, global_size, nullptr,
0, nullptr, nullptr));
global_offset, global_size, nullptr, 0,
nullptr, nullptr));
ASSERT_SUCCESS(urQueueFinish(queue));
ValidateBuffer(buffer, buffer_size, val);
}
Expand All @@ -103,7 +102,8 @@ struct urEnqueueKernelLaunch3DTest : uur::urKernelExecutionTest {
uint32_t val = 42;
size_t global_size[3] = {4, 4, 4};
size_t global_offset[3] = {0, 0, 0};
size_t buffer_size = sizeof(val) * global_size[0] * global_size[1] * global_size[2];
size_t buffer_size =
sizeof(val) * global_size[0] * global_size[1] * global_size[2];
size_t n_dimensions = 3;
};
UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunch3DTest);
Expand All @@ -113,8 +113,8 @@ TEST_P(urEnqueueKernelLaunch3DTest, Success) {
AddBuffer1DArg(buffer_size, &buffer);
AddPodArg(val);
ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
global_offset, global_size, nullptr,
0, nullptr, nullptr));
global_offset, global_size, nullptr, 0,
nullptr, nullptr));
ASSERT_SUCCESS(urQueueFinish(queue));
ValidateBuffer(buffer, buffer_size, val);
}
13 changes: 4 additions & 9 deletions test/conformance/kernel/urKernelCreate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,10 @@ struct urKernelCreateTest : uur::urProgramTest {
void SetUp() override {
UUR_RETURN_ON_FATAL_FAILURE(urProgramTest::SetUp());
ASSERT_SUCCESS(urProgramBuild(context, program, nullptr));
size_t kernel_string_size = 0;
ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_KERNEL_NAMES,
0, nullptr, &kernel_string_size));
std::string kernel_string;
kernel_string.resize(kernel_string_size);
ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_KERNEL_NAMES,
kernel_string.size(),
kernel_string.data(), nullptr));
kernel_name = kernel_string.substr(0, kernel_string.find(";"));
auto kernel_names =
uur::KernelsEnvironment::instance->GetEntryPointNames(
this->program_name);
kernel_name = kernel_names[0];
}

void TearDown() override {
Expand Down
60 changes: 43 additions & 17 deletions test/conformance/kernel/urKernelSetArgPointer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

#include <uur/fixtures.h>

struct urKernelSetArgPointerTest : uur::urKernelTest {
struct urKernelSetArgPointerTest : uur::urKernelExecutionTest {
void SetUp() {
program_name = "fill";
UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::SetUp());
program_name = "fill_usm";
UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp());
}

void TearDown() {
Expand All @@ -16,15 +16,23 @@ struct urKernelSetArgPointerTest : uur::urKernelTest {
UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::TearDown());
}

void ValidateAllocation() {
for (size_t i = 0; i < array_size; i++) {
ASSERT_EQ(static_cast<uint32_t *>(allocation)[i], data);
}
}

void *allocation = nullptr;
size_t allocation_size = 16 * sizeof(uint32_t);
size_t array_size = 16;
size_t allocation_size = array_size * sizeof(uint32_t);
uint32_t data = 42;
};
UUR_INSTANTIATE_KERNEL_TEST_SUITE_P(urKernelSetArgPointerTest);

TEST_P(urKernelSetArgPointerTest, SuccessHost) {
ur_device_usm_access_capability_flags_t host_supported = false;
ASSERT_SUCCESS(uur::GetDeviceUSMHostSupport(device, host_supported));
if (!host_supported) {
ur_device_usm_access_capability_flags_t host_usm_flags = 0;
ASSERT_SUCCESS(uur::GetDeviceUSMHostSupport(device, host_usm_flags));
if (!(host_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) {
GTEST_SKIP() << "Host USM is not supported.";
}

Expand All @@ -33,27 +41,39 @@ TEST_P(urKernelSetArgPointerTest, SuccessHost) {
ASSERT_NE(allocation, nullptr);

ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, allocation));
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(data), &data));
ASSERT_SUCCESS(urEnqueueUSMPrefetch(queue, allocation, allocation_size,
UR_USM_MIGRATION_FLAG_DEFAULT, 0,
nullptr, nullptr));
Launch1DRange(array_size);
ValidateAllocation();
}

TEST_P(urKernelSetArgPointerTest, SuccessDevice) {
ur_device_usm_access_capability_flags_t device_supported = false;
ASSERT_SUCCESS(uur::GetDeviceUSMDeviceSupport(device, device_supported));
if (!device_supported) {
GTEST_SKIP() << "Host USM is not supported.";
ur_device_usm_access_capability_flags_t device_usm_flags = 0;
ASSERT_SUCCESS(uur::GetDeviceUSMDeviceSupport(device, device_usm_flags));
if (!(device_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) {
GTEST_SKIP() << "Device USM is not supported.";
}

ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr,
allocation_size, &allocation));
ASSERT_NE(allocation, nullptr);

ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, allocation));
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(data), &data));
ASSERT_SUCCESS(urEnqueueUSMPrefetch(queue, allocation, allocation_size,
UR_USM_MIGRATION_FLAG_DEFAULT, 0,
nullptr, nullptr));
Launch1DRange(array_size);
ValidateAllocation();
}

TEST_P(urKernelSetArgPointerTest, SuccessShared) {
ur_device_usm_access_capability_flags_t shared_supported = false;
ur_device_usm_access_capability_flags_t shared_usm_flags = 0;
ASSERT_SUCCESS(
uur::GetDeviceUSMSingleSharedSupport(device, shared_supported));
if (!shared_supported) {
uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags));
if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) {
GTEST_SKIP() << "Shared USM is not supported.";
}

Expand All @@ -62,20 +82,26 @@ TEST_P(urKernelSetArgPointerTest, SuccessShared) {
ASSERT_NE(allocation, nullptr);

ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, allocation));
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(data), &data));
ASSERT_SUCCESS(urEnqueueUSMPrefetch(queue, allocation, allocation_size,
UR_USM_MIGRATION_FLAG_DEFAULT, 0,
nullptr, nullptr));
Launch1DRange(array_size);
ValidateAllocation();
}

struct urKernelSetArgPointerNegativeTest : urKernelSetArgPointerTest {
// Get any valid allocation we can to test validation of the other parameters.
void SetUpAllocation() {
ur_device_usm_access_capability_flags_t host_supported = false;
ur_device_usm_access_capability_flags_t host_supported = 0;
ASSERT_SUCCESS(uur::GetDeviceUSMHostSupport(device, host_supported));
if (host_supported) {
ASSERT_SUCCESS(urUSMHostAlloc(context, nullptr, nullptr,
allocation_size, &allocation));
return;
}

ur_device_usm_access_capability_flags_t device_supported = false;
ur_device_usm_access_capability_flags_t device_supported = 0;
ASSERT_SUCCESS(
uur::GetDeviceUSMDeviceSupport(device, device_supported));
if (device_supported) {
Expand All @@ -84,7 +110,7 @@ struct urKernelSetArgPointerNegativeTest : urKernelSetArgPointerTest {
return;
}

ur_device_usm_access_capability_flags_t shared_supported = false;
ur_device_usm_access_capability_flags_t shared_supported = 0;
ASSERT_SUCCESS(
uur::GetDeviceUSMSingleSharedSupport(device, shared_supported));
if (shared_supported) {
Expand Down
Loading

0 comments on commit aef17a2

Please sign in to comment.