Skip to content

Commit ae02405

Browse files
authored
Merge pull request #81 from unisa-hpc/sycl2020
Update SYCL-Bench to SYCL 2020
2 parents db1c70c + f60695f commit ae02405

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+5170
-4163
lines changed

Diff for: .clang-tidy

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
---
2+
#
3+
# Clang-Tidy configuration for SYCL-Bench.
4+
#
5+
# There are three usage scenarios:
6+
# 1. Automatic checks through an IDE (CLion, VsCode, ...)
7+
# 2. Running manually on select files (not recommended)
8+
# `clang-tidy -p path/to/compile_commands.json file1 [file2, ...]`
9+
# Note: A script for running clang-tidy on all Celerity sources is provided in `ci/run-clang-tidy.sh`
10+
# 3. Running on a diff (for CI)
11+
# `git diff -U0 --no-color | clang-tidy-diff.py -p1 -path path/to/compile_commands.json`
12+
#
13+
InheritParentConfig: false
14+
# See https://clang.llvm.org/extra/clang-tidy/checks/list.html for a full list of available checks.
15+
Checks: -*,
16+
readability-*,
17+
-readability-avoid-const-params-in-decls,
18+
-readability-function-cognitive-complexity,
19+
-readability-identifier-length,
20+
-readability-magic-numbers,
21+
-readability-uppercase-literal-suffix,
22+
-readability-convert-member-functions-to-static
23+
-readability-qualified-auto
24+
25+
# Treat naming violations as errors
26+
WarningsAsErrors: "readability-identifier-naming"
27+
# Use .clang-format configuration for fixes
28+
FormatStyle: file

Diff for: .gitignore

+8
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,10 @@
11
/build*
2+
*.csv
3+
img/
24

5+
# Clangd
6+
.cache/
7+
.clangd
8+
9+
# Vscode
10+
.vscode/

Diff for: CITATION.cff

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
cff-version: 1.2.0
2+
message: "If you use this software, please cite it as below."
3+
conference-paper: "Proceedings of the 12th International Workshop on OpenCL and SYCL (IWOCL 24)"
4+
authors:
5+
- family-names: "Luigi"
6+
given-names: "Crisci"
7+
- family-names: "Lorenzo"
8+
given-names: "Carpentieri"
9+
- family-names: "Peter"
10+
given-names: "Thoman"
11+
- family-names: "Aksel"
12+
given-names: "Alpay"
13+
- family-names: "Vincent"
14+
given-names: "Heuveline"
15+
- family-names: "Biagio"
16+
given-names: "Cosenza"
17+
title: "SYCL-Bench 2020: Benchmarking SYCL 2020 on AMD, Intel, and NVIDIA GPUs"
18+
version: 2.0.4
19+
doi: 10.1145/3648115.3648120
20+
date-released: 2024-04-08
21+
url: "https://github.com/unisa-hpc/sycl-bench/"

Diff for: CMakeLists.txt

+89-44
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,31 @@
1-
cmake_minimum_required (VERSION 3.5)
1+
cmake_minimum_required(VERSION 3.5)
22
project(sycl-bench)
33

44
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake)
55
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
66

77
if(NOT CMAKE_BUILD_TYPE)
8-
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake Build Type" FORCE)
8+
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake Build Type" FORCE)
99
endif()
1010

1111
set(CMAKE_CXX_STANDARD 17)
1212
set(CMAKE_CXX_STANDARD_REQUIRED ON)
1313
set(CMAKE_CXX_EXTENSIONS OFF)
1414

15-
# Due to CMake limitations, hipSYCL requires C++ standard to be set manually
16-
set(CMAKE_SYCL_FLAGS "${CMAKE_SYCL_FLAGS} -std=c++17")
15+
# Default build flags
16+
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -DDEBUG -fno-omit-frame-pointer" CACHE STRING "Flags used by the C++ compiler during debug builds." FORCE)
17+
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -march=native -ffast-math" CACHE STRING "Flags used by the C++ compiler during release builds." FORCE)
18+
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -DNDEBUG -march=native -ffast-math -g -fno-omit-frame-pointer" CACHE STRING "Flags used by the C++ compiler during release builds with debug info." FORCE)
19+
1720

1821
if(CMAKE_GENERATOR STREQUAL "Ninja")
19-
set(CMAKE_SYCL_FLAGS "${CMAKE_SYCL_FLAGS} -fdiagnostics-color=always")
20-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
21-
set(COMPUTECPP_USER_FLAGS "${COMPUTECPP_USER_FLAGS} -fdiagnostics-color=always")
22+
set(CMAKE_SYCL_FLAGS "${CMAKE_SYCL_FLAGS} -fdiagnostics-color=always")
23+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
24+
set(COMPUTECPP_USER_FLAGS "${COMPUTECPP_USER_FLAGS} -fdiagnostics-color=always")
25+
endif()
26+
27+
if(SYCL_BENCH_ENABLE_QUEUE_PROFILING)
28+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSYCL_BENCH_ENABLE_QUEUE_PROFILING")
2229
endif()
2330

2431
include(InstallRequiredSystemLibraries)
@@ -28,31 +35,55 @@ include_directories(${CMAKE_SOURCE_DIR}/include)
2835
include_directories(${CMAKE_SOURCE_DIR}/polybench/common)
2936

3037
set(supported_implementations
31-
ComputeCpp
32-
hipSYCL
33-
LLVM
34-
LLVM-CUDA
35-
triSYCL
38+
AdaptiveCpp
39+
dpcpp
40+
triSYCL
3641
)
3742

3843
list(FIND supported_implementations ${SYCL_IMPL} impl_idx)
44+
3945
if(NOT SYCL_IMPL OR impl_idx EQUAL -1)
40-
message(FATAL_ERROR "Please specify SYCL_IMPL (one of: ${supported_implementations})")
46+
message(FATAL_ERROR "Please specify SYCL_IMPL (one of: ${supported_implementations})")
4147
endif()
4248

43-
if(SYCL_IMPL STREQUAL "ComputeCpp")
44-
find_package(ComputeCpp MODULE REQUIRED)
45-
elseif(SYCL_IMPL STREQUAL "hipSYCL")
46-
find_package(hipSYCL CONFIG REQUIRED)
47-
elseif(SYCL_IMPL STREQUAL "LLVM")
48-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl")
49-
elseif(SYCL_IMPL STREQUAL "LLVM-CUDA")
50-
set(CMAKE_CXX_FLAGS
51-
"${CMAKE_CXX_FLAGS} -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycldevice")
49+
if(SYCL_IMPL STREQUAL "AdaptiveCpp")
50+
find_package(AdaptiveCpp REQUIRED)
51+
elseif(SYCL_IMPL STREQUAL "dpcpp")
52+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl")
53+
54+
if(DPCPP_WITH_CUDA_BACKEND)
55+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
56+
57+
set(CUDA_ARCH "" CACHE STRING "CUDA device architecture e.g. sm_70")
58+
59+
if(NOT CUDA_ARCH STREQUAL "")
60+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --cuda-gpu-arch=${CUDA_ARCH}")
61+
endif()
62+
endif()
63+
64+
if(DPCPP_WITH_ROCM_BACKEND)
65+
set(ROCM_ARCH "" CACHE STRING "ROCm device architecture e.g. gfx908")
66+
67+
if(NOT ROCM_ARCH STREQUAL "")
68+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amd_gpu_${ROCM_ARCH}")
69+
endif()
70+
endif()
71+
72+
if(DPCPP_WITH_LZ_BACKEND)
73+
set(LZ_ARCH "" CACHE STRING "Level Zero device architecture e.g. acm-g10")
74+
75+
if(NOT LZ_ARCH STREQUAL "")
76+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=${LZ_ARCH}")
77+
endif()
78+
endif()
79+
5280
elseif(SYCL_IMPL STREQUAL "triSYCL")
53-
find_package(TriSYCL MODULE REQUIRED)
81+
find_package(TriSYCL MODULE REQUIRED)
5482
endif()
5583

84+
# Check if SYCL implementation implements the required SYCL features
85+
include(HasFeatures)
86+
5687
set(benchmarks
5788
micro/arith.cpp
5889
micro/DRAM.cpp
@@ -92,42 +123,56 @@ set(benchmarks
92123
polybench/mvt.cpp
93124
polybench/syr2k.cpp
94125
polybench/syrk.cpp
95-
#compiletime/compiletime.cpp
96-
)
97126

98-
foreach(benchmark IN LISTS benchmarks)
99-
get_filename_component(target ${benchmark} NAME_WE)
127+
# compiletime/compiletime.cpp
128+
sycl2020/atomics/atomic_reduction.cpp
129+
sycl2020/USM/usm_accessors_latency.cpp
130+
sycl2020/USM/usm_instr_mix.cpp
131+
sycl2020/USM/usm_pinned_overhead.cpp
132+
sycl2020/USM/usm_allocation_latency.cpp
133+
)
134+
# Selectively add benchmarks based on some SYCL 2020 features
135+
if (SYCL_BENCH_HAS_SPEC_CONSTANTS)
136+
list(APPEND benchmarks sycl2020/spec_constants/spec_constant_convolution.cpp)
137+
endif()
138+
if (SYCL_BENCH_HAS_KERNEL_REDUCTIONS)
139+
list(APPEND benchmarks sycl2020/kernel_reduction/kernel_reduction.cpp)
140+
endif()
141+
if (SYCL_BENCH_HAS_GROUP_ALGORITHMS)
142+
list(APPEND benchmarks sycl2020/group_algorithms/reduce_over_group.cpp)
143+
endif()
100144

101-
add_executable(${target} ${benchmark})
145+
# Setting variables
146+
add_compile_definitions(SYCL_BENCH_HAS_FP64_SUPPORT=$<BOOL:${SYCL_BENCH_HAS_FP64_SUPPORT}>)
102147

103-
if(SYCL_IMPL STREQUAL "ComputeCpp" OR SYCL_IMPL STREQUAL "hipSYCL")
104-
add_sycl_to_target(TARGET ${target} SOURCES ${benchmark})
105-
endif()
148+
foreach(benchmark IN LISTS benchmarks)
149+
get_filename_component(target ${benchmark} NAME_WE)
106150

107-
if(SYCL_IMPL STREQUAL "ComputeCpp" AND COMPUTECPP_BITCODE STREQUAL "ptx64")
108-
target_compile_definitions(${target} PRIVATE SYCL_BENCH_ENABLE_QUEUE_PROFILING)
109-
endif()
151+
add_executable(${target} ${benchmark})
110152

111-
if(SYCL_IMPL STREQUAL "LLVM")
112-
target_compile_definitions(${target} PRIVATE __LLVM_SYCL__)
113-
endif()
153+
if(SYCL_IMPL STREQUAL "AdaptiveCpp")
154+
add_sycl_to_target(TARGET ${target} SOURCES ${benchmark})
155+
endif()
114156

115-
if(SYCL_IMPL STREQUAL "LLVM-CUDA")
116-
target_compile_definitions(${target} PRIVATE __LLVM_SYCL_CUDA__)
117-
endif()
157+
if(SYCL_IMPL STREQUAL "dpcpp")
158+
target_compile_definitions(${target} PRIVATE __DPCPP__)
159+
endif()
118160

119161
if(SYCL_IMPL STREQUAL "triSYCL")
120162
add_sycl_to_target(${target})
121163
target_compile_definitions(${target} PRIVATE __TRISYCL__)
122164
endif()
123-
165+
166+
if(ENABLE_TIME_EVENT_PROFILING)
167+
target_compile_definitions(${target} PUBLIC SYCL_BENCH_ENABLE_QUEUE_PROFILING=1)
168+
endif()
169+
124170
install(TARGETS ${target} RUNTIME DESTINATION bin/benchmarks/)
125171
get_filename_component(dir ${benchmark} DIRECTORY)
126172
set_property(TARGET ${target} PROPERTY FOLDER ${dir})
127173
endforeach(benchmark)
128174

129175
# The "compiletime" target should only be used in the context of the compile time evaluation script
130-
#set_target_properties(compiletime PROPERTIES EXCLUDE_FROM_ALL 1)
131-
176+
# set_target_properties(compiletime PROPERTIES EXCLUDE_FROM_ALL 1)
132177
install(PROGRAMS bin/run-suite DESTINATION bin/)
133-
install(FILES ${PROJECT_SOURCE_DIR}/Brommy.bmp DESTINATION share/)
178+
install(FILES ${PROJECT_SOURCE_DIR}/share/Brommy.bmp DESTINATION share/)

Diff for: README.md

+19-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ $ mkdir build && cd build
2727

2828
Compile with CMake
2929
```
30-
$ cmake -DSYCL_IMPL=[target SYCL implementation] [other compiler arguments] ..
30+
$ cmake -DSYCL_IMPL=[target SYCL implementation] [-DSYCL_BENCH_HAS_FP64_SUPPORT=ON|OFF] [other compiler arguments] ..
3131
$ cmake --build .
3232
$ sudo make install
3333
```
@@ -57,6 +57,24 @@ Packages built via the `package` target will contain all files contained in a SY
5757

5858
## Attribution
5959
If you use SYCL-Bench, please cite the following papers:
60+
```
61+
@inproceedings{SYCL-Bench:IWOCL:2024,
62+
author = {Crisci, Luigi and Carpentieri, Lorenzo and Thoman, Peter and Alpay, Aksel and Heuveline, Vincent and Cosenza, Biagio},
63+
title = {SYCL-Bench 2020: Benchmarking SYCL 2020 on AMD, Intel, and NVIDIA GPUs},
64+
year = {2024},
65+
isbn = {9798400717901},
66+
publisher = {Association for Computing Machinery},
67+
address = {New York, NY, USA},
68+
url = {https://doi.org/10.1145/3648115.3648120},
69+
doi = {10.1145/3648115.3648120},
70+
booktitle = {Proceedings of the 12th International Workshop on OpenCL and SYCL},
71+
articleno = {1},
72+
numpages = {12},
73+
keywords = {GPU, HPC, SYCL, benchmark, heterogeneous computing, portability},
74+
location = {<conf-loc>, <city>Chicago</city>, <state>IL</state>, <country>USA</country>, </conf-loc>},
75+
series = {IWOCL '24}
76+
}
77+
```
6078

6179
```
6280
@inproceedings{SYCL-Bench:Euro-Par:2020,

Diff for: bin/run-suite

+32-4
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,36 @@ default_profile = {
108108
},
109109
'mvt' : {
110110
'--size' : create_log_range(2**14, 2**14)
111-
},
112-
},
113-
'individual-benchmark-flags' : set([])
111+
},
112+
'usm_accessors_latency' : {
113+
'--size' : create_log_range(2**20, 2**20)
114+
},
115+
'usm_allocation_latency' : {
116+
'--size' : create_log_range(2**25, 2**25)
117+
},
118+
'usm_instr_mix' : {
119+
'--size' : create_log_range(2**14, 2**14)
120+
},
121+
'usm_pinned_overhead' : {
122+
'--size' : create_log_range(2**20, 2**20)
123+
},
124+
'spec_constant_convolution' : {
125+
'--size' : create_log_range(2**11, 2**11)
126+
},
127+
'atomic_reduction' : {
128+
'--size' : create_log_range(2**20, 2**20)
129+
},
130+
'reduce_over_group' : {
131+
'--size' : create_log_range(2**20, 2**20)
132+
},
133+
'kernel_reduction' : {
134+
'--size' : create_log_range(2**20, 2**20)
135+
}
136+
},
137+
'individual-benchmark-flags' : {
138+
'usm_instr_mix' : ['--instr-mix=6'],
139+
'usm_pinned_overhead' : ['--num-copies=5'],
140+
}
114141
}
115142

116143
def construct_profile(overridden_options_dict,
@@ -222,8 +249,9 @@ if __name__ == '__main__':
222249
if benchmark_name in individual_benchmark_options:
223250
for param in individual_benchmark_options[benchmark_name]:
224251
options[param] = individual_benchmark_options[benchmark_name][param]
252+
225253
if benchmark_name in individual_benchmark_flags:
226-
for f in individual_benchmark_flags:
254+
for f in individual_benchmark_flags[benchmark_name]:
227255
flags.add(f)
228256

229257
max_runtime = 0.0

Diff for: cmake/HasFeatures.cmake

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
macro(check_feature VAR FILENAME)
2+
if(NOT DEFINED RUN_RES_${VAR})
3+
try_run(RUN_RES_${VAR} COMPILE_RES_${VAR} ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/has-features/src/${FILENAME}
4+
CMAKE_FLAGS ${CMAKE_CXX_FLAGS}
5+
COMPILE_OUTPUT_VARIABLE OUTPUT_VAR
6+
RUN_OUTPUT_VARIABLE RUN_VAR
7+
)
8+
endif()
9+
10+
if (COMPILE_RES_${VAR} AND RUN_RES_${VAR} EQUAL 0)
11+
set(RES ON)
12+
else()
13+
set(RES OFF)
14+
endif()
15+
message(STATUS "${VAR}: ${RES}")
16+
endmacro()
17+
18+
message(STATUS "Checking for SYCL features....")
19+
check_feature(KERNEL_REDUCTIONS kernel_reduction_dummy.cpp)
20+
check_feature(SPEC_CONSTANTS spec_constants_dummy.cpp)
21+
check_feature(GROUP_ALGORITHMS group_algorithms_dummy.cpp)
22+
check_feature(FP64_SUPPORT fp64_support_dummy.cpp)

Diff for: cmake/has-features/src/fp64_support_dummy.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#include <sycl/sycl.hpp>
2+
3+
int main() {
4+
sycl::queue q;
5+
sycl::buffer<double> x(1);
6+
7+
q.submit([&](sycl::handler& cgh) {
8+
sycl::accessor a(x, cgh, sycl::read_write);
9+
cgh.parallel_for<class dummy>(sycl::range<1>(1), [=](sycl::id<1> idx) { a[idx] = 0; });
10+
});
11+
12+
sycl::host_accessor host{x};
13+
assert(host[0] == 0);
14+
15+
}

Diff for: cmake/has-features/src/group_algorithms_dummy.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include <sycl/sycl.hpp>
2+
#include <iostream>
3+
4+
5+
int main() {
6+
sycl::queue q;
7+
int* i = sycl::malloc_shared<int>(1, q);
8+
q.submit([&](sycl::handler& cgh) {
9+
cgh.parallel_for(sycl::nd_range<1>{{1}, {1}}, [=](sycl::nd_item<1> item) {
10+
// call only the group algorithms used in SYCL-Bench
11+
*i = sycl::reduce_over_group(item.get_group(), 1, sycl::plus<int>{});
12+
});
13+
}).wait();
14+
15+
assert(*i == 1);
16+
sycl::free(i, q);
17+
}

0 commit comments

Comments
 (0)