-
Notifications
You must be signed in to change notification settings - Fork 814
[SYCL][New offload model] Add SYCL E2E tests for --offload-new-driver option and fix failing tests #14730
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SYCL][New offload model] Add SYCL E2E tests for --offload-new-driver option and fix failing tests #14730
Changes from all commits
7c1b9ca
3cb7a33
88f964d
f053bd5
b02cfd0
d667c63
91c3fae
0c1af79
151909a
5d728c7
a96e739
f720222
a858ce5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |
| // | ||
| //===---------------------------------------------------------------------===// | ||
|
|
||
| #include "clang/Basic/Cuda.h" | ||
| #include "clang/Basic/Version.h" | ||
| #include "llvm/ADT/MapVector.h" | ||
| #include "llvm/BinaryFormat/Magic.h" | ||
|
|
@@ -409,6 +410,46 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles, | |
|
|
||
| return *TempFileOrErr; | ||
| } | ||
|
|
||
| // ptxas binary | ||
| Expected<StringRef> ptxas(StringRef InputFile, const ArgList &Args, | ||
| StringRef Arch) { | ||
| llvm::TimeTraceScope TimeScope("NVPTX ptxas"); | ||
| // NVPTX uses the ptxas program to process assembly files. | ||
| Expected<std::string> PtxasPath = | ||
| findProgram("ptxas", {CudaBinaryPath + "/bin"}); | ||
| if (!PtxasPath) | ||
| return PtxasPath.takeError(); | ||
|
|
||
| llvm::Triple Triple( | ||
| Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); | ||
|
|
||
| // Create a new file to write the output to. | ||
| auto TempFileOrErr = | ||
| createOutputFile(sys::path::filename(ExecutableName), "cubin"); | ||
| if (!TempFileOrErr) | ||
| return TempFileOrErr.takeError(); | ||
|
|
||
| SmallVector<StringRef, 16> CmdArgs; | ||
| CmdArgs.push_back(*PtxasPath); | ||
| CmdArgs.push_back(Triple.isArch64Bit() ? "-m64" : "-m32"); | ||
| // Pass -v to ptxas if it was passed to the driver. | ||
| if (Args.hasArg(OPT_verbose)) | ||
| CmdArgs.push_back("-v"); | ||
| StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); | ||
| if (Args.hasArg(OPT_debug)) | ||
| CmdArgs.push_back("-g"); | ||
| else | ||
| CmdArgs.push_back(Args.MakeArgString("-" + OptLevel)); | ||
| CmdArgs.push_back("--gpu-name"); | ||
| CmdArgs.push_back(Arch); | ||
| CmdArgs.push_back("--output-file"); | ||
| CmdArgs.push_back(*TempFileOrErr); | ||
| CmdArgs.push_back(InputFile); | ||
| if (Error Err = executeCommands(*PtxasPath, CmdArgs)) | ||
| return std::move(Err); | ||
| return *TempFileOrErr; | ||
| } | ||
| } // namespace nvptx | ||
|
|
||
| namespace amdgcn { | ||
|
|
@@ -1240,7 +1281,8 @@ static Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles, | |
| } // namespace sycl | ||
|
|
||
| namespace generic { | ||
| Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) { | ||
| Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args, | ||
| bool IsSYCLKind = false) { | ||
| llvm::TimeTraceScope TimeScope("Clang"); | ||
| // Use `clang` to invoke the appropriate device tools. | ||
| Expected<std::string> ClangPath = | ||
|
|
@@ -1276,6 +1318,8 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) { | |
| if (!Triple.isNVPTX()) | ||
| CmdArgs.push_back("-Wl,--no-undefined"); | ||
|
|
||
| if (IsSYCLKind && Triple.isNVPTX()) | ||
|
||
| CmdArgs.push_back("-S"); | ||
| for (StringRef InputFile : InputFiles) | ||
| CmdArgs.push_back(InputFile); | ||
|
|
||
|
|
@@ -1369,7 +1413,7 @@ Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles, | |
| case Triple::ppc64: | ||
| case Triple::ppc64le: | ||
| case Triple::systemz: | ||
| return generic::clang(InputFiles, Args); | ||
| return generic::clang(InputFiles, Args, IsSYCLKind); | ||
| case Triple::spirv32: | ||
| case Triple::spirv64: | ||
| case Triple::spir: | ||
|
|
@@ -2078,14 +2122,40 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles( | |
| return OutputFile.takeError(); | ||
| WrappedOutput.push_back(*OutputFile); | ||
| } | ||
|
|
||
| for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { | ||
| SmallVector<StringRef> Files = {SplitModules[I].ModuleFilePath}; | ||
| auto LinkedFileFinalOrErr = | ||
| StringRef Arch = LinkerArgs.getLastArgValue(OPT_arch_EQ); | ||
| if (Arch.empty()) | ||
| Arch = "native"; | ||
| SmallVector<std::pair<StringRef, StringRef>, 4> BundlerInputFiles; | ||
| auto ClangOutputOrErr = | ||
| linkDevice(Files, LinkerArgs, true /* IsSYCLKind */); | ||
| if (!LinkedFileFinalOrErr) | ||
| return LinkedFileFinalOrErr.takeError(); | ||
| SplitModules[I].ModuleFilePath = *LinkedFileFinalOrErr; | ||
| if (!ClangOutputOrErr) | ||
| return ClangOutputOrErr.takeError(); | ||
| if (Triple.isNVPTX()) { | ||
| auto VirtualArch = StringRef(clang::CudaArchToVirtualArchString( | ||
| clang::StringToCudaArch(Arch))); | ||
| auto PtxasOutputOrErr = | ||
| nvptx::ptxas(*ClangOutputOrErr, LinkerArgs, Arch); | ||
| if (!PtxasOutputOrErr) | ||
| return PtxasOutputOrErr.takeError(); | ||
| BundlerInputFiles.emplace_back(*ClangOutputOrErr, VirtualArch); | ||
| BundlerInputFiles.emplace_back(*PtxasOutputOrErr, Arch); | ||
| auto BundledFileOrErr = | ||
| nvptx::fatbinary(BundlerInputFiles, LinkerArgs); | ||
| if (!BundledFileOrErr) | ||
| return BundledFileOrErr.takeError(); | ||
| SplitModules[I].ModuleFilePath = *BundledFileOrErr; | ||
| } else if (Triple.isAMDGCN()) { | ||
| BundlerInputFiles.emplace_back(*ClangOutputOrErr, Arch); | ||
| auto BundledFileOrErr = | ||
| amdgcn::fatbinary(BundlerInputFiles, LinkerArgs); | ||
| if (!BundledFileOrErr) | ||
| return BundledFileOrErr.takeError(); | ||
| SplitModules[I].ModuleFilePath = *BundledFileOrErr; | ||
| } else { | ||
| SplitModules[I].ModuleFilePath = *ClangOutputOrErr; | ||
| } | ||
| } | ||
| // TODO(NOM7): Remove this call and use community flow for bundle/wrap | ||
| auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| //==--- aot.cpp - Simple vector addition (AOT compilation example) --------==// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===---------------------------------------------------------------------===// | ||
|
|
||
| #include <sycl/detail/core.hpp> | ||
|
|
||
| #include <array> | ||
| #include <iostream> | ||
|
|
||
| constexpr sycl::access::mode sycl_read = sycl::access::mode::read; | ||
| constexpr sycl::access::mode sycl_write = sycl::access::mode::write; | ||
|
|
||
| template <typename T> class Vadd; | ||
|
|
||
| template <typename T, size_t N> | ||
| void vadd(const std::array<T, N> &A, const std::array<T, N> &B, | ||
| std::array<T, N> &C) { | ||
| sycl::queue Queue([](sycl::exception_list ExceptionList) { | ||
| for (std::exception_ptr ExceptionPtr : ExceptionList) { | ||
| try { | ||
| std::rethrow_exception(ExceptionPtr); | ||
| } catch (sycl::exception &E) { | ||
| std::cerr << E.what(); | ||
| } catch (...) { | ||
| std::cerr << "Unknown async exception was caught." << std::endl; | ||
| } | ||
| } | ||
| }); | ||
|
|
||
| sycl::range<1> numOfItems{N}; | ||
| sycl::buffer bufA(A.data(), numOfItems); | ||
| sycl::buffer bufB(B.data(), numOfItems); | ||
| sycl::buffer bufC(C.data(), numOfItems); | ||
|
|
||
| Queue.submit([&](sycl::handler &cgh) { | ||
| sycl::accessor accA{bufA, cgh, sycl::read_only}; | ||
| sycl::accessor accB{bufB, cgh, sycl::read_only}; | ||
| sycl::accessor accC{bufC, cgh, sycl::write_only}; | ||
|
|
||
| cgh.parallel_for<Vadd<T>>(numOfItems, [=](sycl::id<1> wiID) { | ||
| accC[wiID] = accA[wiID] + accB[wiID]; | ||
| }); | ||
| }); | ||
|
|
||
| Queue.wait_and_throw(); | ||
| } | ||
|
|
||
| int main() { | ||
| const size_t array_size = 4; | ||
| std::array<int, array_size> A = {{1, 2, 3, 4}}, B = {{1, 2, 3, 4}}, C; | ||
| std::array<float, array_size> D = {{1.f, 2.f, 3.f, 4.f}}, | ||
| E = {{1.f, 2.f, 3.f, 4.f}}, F; | ||
| vadd(A, B, C); | ||
| vadd(D, E, F); | ||
| for (unsigned int i = 0; i < array_size; i++) { | ||
| if (C[i] != A[i] + B[i]) { | ||
| std::cout << "Incorrect result (element " << i << " is " << C[i] << "!\n"; | ||
| return 1; | ||
| } | ||
| if (F[i] != D[i] + E[i]) { | ||
| std::cout << "Incorrect result (element " << i << " is " << F[i] << "!\n"; | ||
| return 1; | ||
| } | ||
| } | ||
| std::cout << "Correct result!\n"; | ||
| return 0; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| #include "split-per-source.h" | ||
|
||
|
|
||
| void runKernelsFromFile2() { | ||
| sycl::queue Q; | ||
| int Data = 0; | ||
| { | ||
| sycl::buffer<int, 1> Buf(&Data, sycl::range<1>(1)); | ||
| auto KernelID1 = sycl::get_kernel_id<File2Kern1>(); | ||
| auto KB = sycl::get_kernel_bundle<sycl::bundle_state::executable>( | ||
| Q.get_context(), {KernelID1}); | ||
| auto Krn = KB.get_kernel(KernelID1); | ||
|
|
||
| std::vector<sycl::kernel_id> KernelIDStorage = KB.get_kernel_ids(); | ||
| assert(KernelIDStorage.size() == 1); | ||
| assert(KernelIDStorage[0] == KernelID1); | ||
|
|
||
| Q.submit([&](sycl::handler &Cgh) { | ||
| auto Acc = Buf.get_access<sycl::access::mode::read_write>(Cgh); | ||
| Cgh.single_task<File2Kern1>(Krn, [=]() { Acc[0] = 3; }); | ||
| }); | ||
| } | ||
| assert(Data == 3); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| #include <sycl/detail/core.hpp> | ||
|
|
||
| class File1Kern1; | ||
| class File1Kern2; | ||
| class File2Kern1; | ||
|
|
||
| void runKernelsFromFile2(); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| // REQUIRES: opencl-aot, cpu | ||
|
||
|
|
||
| // Test with `--offload-new-driver` | ||
| // RUN: %clangxx -fsycl -fsycl-device-code-split=per_source -fsycl-targets=spir64_x86_64 -I %S/Inputs -o %t.out %S/split-per-source-main.cpp %S/Inputs/split-per-source-second-file.cpp \ | ||
| // RUN: -fsycl-dead-args-optimization --offload-new-driver | ||
|
||
| // RUN: %{run} %t.out | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| // REQUIRES: ocloc, gpu | ||
| // UNSUPPORTED: cuda || hip | ||
| // CUDA does neither support device code splitting nor SPIR. | ||
| // Test with `--offload-new-driver` | ||
| // | ||
| // RUN: %clangxx -fsycl -fsycl-device-code-split=per_source \ | ||
| // RUN: -fsycl-targets=spir64_gen \ | ||
| // RUN: -Xsycl-target-backend=spir64_gen \ | ||
| // RUN: "-device tgllp" -I %S/Inputs -o %t.out \ | ||
| // RUN: %S/split-per-source-main.cpp \ | ||
| // RUN: %S/Inputs/split-per-source-second-file.cpp \ | ||
| // RUN: -fsycl-dead-args-optimization --offload-new-driver | ||
| // RUN: %{run} %t.out |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| //==--- cpu.cpp - AOT compilation for cpu devices using opencl-aot --------==// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===---------------------------------------------------------------------===// | ||
|
|
||
| // REQUIRES: opencl-aot, cpu | ||
|
||
|
|
||
| // Test with `--offload-new-driver` | ||
| // RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -o %t.out | ||
| // RUN: %{run} %t.out | ||
|
|
||
| // Test that opencl-aot can handle multiple build options. | ||
| // RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -Xsycl-target-backend "--bo=-g" -Xsycl-target-backend "--bo=-cl-opt-disable" -o %t2.out | ||
|
|
||
| // Test that opencl-aot can handle march option. | ||
| // RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -Xsycl-target-backend "--march=avx512" | ||
| // RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -Xsycl-target-backend "--march=wsm" | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: if we are returning, do we need to
std::move?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see that std::move has been used wherever we are returning Err. I followed the trend.