-
Notifications
You must be signed in to change notification settings - Fork 803
[SYCL] Split device images based on accuracy level provided in option #10140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
94ac8d5
f235c44
649fd15
404f82e
d107740
a537ca9
edf01dd
c4b0c56
756690c
4dd7de1
9408518
2fb28d3
9807f64
a13c1ff
c3afa41
8e025fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,7 @@ | |
| #include "CGObjCRuntime.h" | ||
| #include "CGOpenCLRuntime.h" | ||
| #include "CGRecordLayout.h" | ||
| #include "CGSYCLRuntime.h" | ||
| #include "CodeGenFunction.h" | ||
| #include "CodeGenModule.h" | ||
| #include "ConstantEmitter.h" | ||
|
|
@@ -513,12 +514,17 @@ static CallInst *CreateBuiltinCallWithAttr(CodeGenFunction &CGF, StringRef Name, | |
| // TODO: Replace AttrList with a single attribute. The call can only have a | ||
| // single FPAccuracy attribute. | ||
| llvm::AttributeList AttrList; | ||
| // "sycl_used_aspects" metadata associated with the call. | ||
| SmallVector<llvm::Metadata *, 4> AspectsMD; | ||
|
||
| // sincos() doesn't return a value, but it still has a type associated with | ||
| // it that corresponds to the operand type. | ||
| CGF.CGM.getFPAccuracyFuncAttributes( | ||
| Name, AttrList, ID, | ||
| Name, AttrList, AspectsMD, ID, | ||
| Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType()); | ||
| CI->setAttributes(AttrList); | ||
| if (!AspectsMD.empty()) | ||
| CI->setMetadata("sycl_used_aspects", | ||
| llvm::MDNode::get(CGF.CGM.getLLVMContext(), AspectsMD)); | ||
| return CI; | ||
| } | ||
|
|
||
|
|
@@ -22144,7 +22150,8 @@ llvm::CallInst *CodeGenFunction::EmitFPBuiltinIndirectCall( | |
| // Even if the current function doesn't have a clang builtin, create | ||
| // an 'fpbuiltin-max-error' attribute for it; unless it's marked with | ||
| // an NoBuiltin attribute. | ||
| if (!FD->hasAttr<NoBuiltinAttr>()) { | ||
| if (!FD->hasAttr<NoBuiltinAttr>() && | ||
| FD->getNameInfo().getName().isIdentifier()) { | ||
|
||
| Name = FD->getName(); | ||
| FPAccuracyIntrinsicID = | ||
| llvm::StringSwitch<unsigned>(Name) | ||
|
|
@@ -22155,7 +22162,11 @@ llvm::CallInst *CodeGenFunction::EmitFPBuiltinIndirectCall( | |
| .Case("frem", llvm::Intrinsic::fpbuiltin_frem) | ||
| .Case("sincos", llvm::Intrinsic::fpbuiltin_sincos) | ||
| .Case("exp10", llvm::Intrinsic::fpbuiltin_exp10) | ||
| .Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt); | ||
| .Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt) | ||
| .Default(0); | ||
| if (!FPAccuracyIntrinsicID) { | ||
|
||
| return nullptr; | ||
| } | ||
| } else { | ||
| return nullptr; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ | |
| #include "CGCXXABI.h" | ||
| #include "CGCleanup.h" | ||
| #include "CGRecordLayout.h" | ||
| #include "CGSYCLRuntime.h" | ||
| #include "CodeGenFunction.h" | ||
| #include "CodeGenModule.h" | ||
| #include "TargetInfo.h" | ||
|
|
@@ -1846,8 +1847,18 @@ static llvm::fp::FPAccuracy convertFPAccuracy(StringRef FPAccuracyStr) { | |
| .Case("cuda", llvm::fp::FPAccuracy::CUDA); | ||
| } | ||
|
|
||
| static int32_t convertFPAccuracyToAspect(StringRef FPAccuracyStr) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to add an assert here to ensure this function is called with appropriate FPAccuracyStr? Thanks
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added assert, thanks. |
||
| return llvm::StringSwitch<int32_t>(FPAccuracyStr) | ||
| .Case("high", SYCLInternalAspect::fp_intrinsic_accuracy_high) | ||
| .Case("medium", SYCLInternalAspect::fp_intrinsic_accuracy_medium) | ||
| .Case("low", SYCLInternalAspect::fp_intrinsic_accuracy_low) | ||
| .Case("sycl", SYCLInternalAspect::fp_intrinsic_accuracy_sycl) | ||
| .Case("cuda", SYCLInternalAspect::fp_intrinsic_accuracy_cuda); | ||
| } | ||
|
|
||
| void CodeGenModule::getDefaultFunctionFPAccuracyAttributes( | ||
| StringRef Name, llvm::AttrBuilder &FuncAttrs, unsigned ID, | ||
| StringRef Name, llvm::AttrBuilder &FuncAttrs, | ||
| SmallVector<llvm::Metadata *, 4> &MDs, unsigned ID, | ||
| const llvm::Type *FuncType) { | ||
| // Priority is given to to the accuracy specific to the function. | ||
| // So, if the command line is something like this: | ||
|
|
@@ -1864,6 +1875,9 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes( | |
| ID, FuncType, convertFPAccuracy(FuncMapIt->second)); | ||
| assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected"); | ||
| FuncAttrs.addAttribute("fpbuiltin-max-error=", FPAccuracyVal); | ||
| if (getLangOpts().SYCLIsDevice) | ||
|
||
| MDs.push_back(llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( | ||
| Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second)))); | ||
| } | ||
| } | ||
| if (FuncAttrs.attrs().size() == 0) | ||
|
|
@@ -1872,6 +1886,9 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes( | |
| ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal)); | ||
| assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected"); | ||
| FuncAttrs.addAttribute("fpbuiltin-max-error=", FPAccuracyVal); | ||
| if (getLangOpts().SYCLIsDevice) | ||
| MDs.push_back(llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( | ||
| Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)))); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -5620,8 +5637,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, | |
| // Emit the actual call/invoke instruction. | ||
| llvm::CallBase *CI; | ||
| if (!InvokeDest) { | ||
| if (!getLangOpts().FPAccuracyFuncMap.empty() || | ||
| !getLangOpts().FPAccuracyVal.empty()) { | ||
| if ((!getLangOpts().FPAccuracyFuncMap.empty() || | ||
| !getLangOpts().FPAccuracyVal.empty()) && | ||
| isa_and_nonnull<FunctionDecl>(TargetDecl)) { | ||
|
||
| const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl); | ||
| assert(FD && "expecting a function"); | ||
| CI = EmitFPBuiltinIndirectCall(IRFuncTy, IRCallArgs, CalleePtr, FD); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,102 @@ | ||
| // RUN: %clang_cc1 -fsycl-is-device -ffp-builtin-accuracy=high:sin,sqrt -ffp-builtin-accuracy=medium:cos -ffp-builtin-accuracy=low:tan -ffp-builtin-accuracy=cuda:exp,acos -ffp-builtin-accuracy=sycl:log,asin -emit-llvm -triple spir64-unknown-unknown -disable-llvm-passes %s -o - | FileCheck %s | ||
|
||
|
|
||
| // Tests that sycl_used_aspects metadata is attached to the fpbuiltin call based on -ffp-accuracy option. | ||
|
|
||
| #include "Inputs/sycl.hpp" | ||
Fznamznon marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| extern "C" SYCL_EXTERNAL double sin(double); | ||
| extern "C" SYCL_EXTERNAL double cos(double); | ||
| extern "C" SYCL_EXTERNAL double tan(double); | ||
| extern "C" SYCL_EXTERNAL double log(double); | ||
| extern "C" SYCL_EXTERNAL double exp(double); | ||
| extern "C" SYCL_EXTERNAL double acos(double); | ||
| extern "C" SYCL_EXTERNAL double asin(double); | ||
| extern "C" SYCL_EXTERNAL double sqrt(double); | ||
|
|
||
| using namespace sycl; | ||
|
|
||
| int main() { | ||
| const unsigned array_size = 4; | ||
| double Value = .5; | ||
| queue deviceQueue; | ||
| range<1> numOfItems{array_size}; | ||
|
|
||
| // Kernel0 doesn't use math functions. | ||
| deviceQueue.submit([&](handler& cgh) { | ||
| cgh.parallel_for<class Kernel0>(numOfItems, | ||
| [=](id<1> wiID) { | ||
| (void)Value; | ||
| }); | ||
| }); | ||
|
|
||
| // Kernel1 uses high-accuracy sin. | ||
| deviceQueue.submit([&](handler& cgh) { | ||
| cgh.parallel_for<class Kernel1>(numOfItems, | ||
| [=](id<1> wiID) { | ||
| // CHECK: call double @llvm.fpbuiltin.sin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[ASPECT1:[0-9]+]] | ||
| (void)sin(Value); | ||
| }); | ||
| }); | ||
|
|
||
| deviceQueue.submit([&](handler& cgh) { | ||
| cgh.parallel_for<class Kernel2>(numOfItems, | ||
| [=](id<1> wiID) { | ||
| // CHECK: call double @llvm.fpbuiltin.cos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[ASPECT2:[0-9]+]] | ||
| (void)cos(Value); | ||
| }); | ||
| }); | ||
|
|
||
| // Kernel3 uses low-accuracy tan. | ||
| deviceQueue.submit([&](handler& cgh) { | ||
| cgh.parallel_for<class Kernel3>(numOfItems, | ||
| [=](id<1> wiID) { | ||
| // CHECK: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[ASPECT3:[0-9]+]] | ||
| (void)tan(Value); | ||
| }); | ||
| }); | ||
|
|
||
| // Kernel4 uses cuda-accuracy exp and sycl-accuracy log. | ||
| deviceQueue.submit([&](handler& cgh) { | ||
| cgh.parallel_for<class Kernel4>(numOfItems, | ||
| [=](id<1> wiID) { | ||
| // CHECK: call double @llvm.fpbuiltin.exp.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[ASPECT4:[0-9]+]] | ||
| // CHECK: call double @llvm.fpbuiltin.log.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[ASPECT5:[0-9]+]] | ||
| (void)log(exp(Value)); | ||
| }); | ||
| }); | ||
| deviceQueue.wait(); | ||
|
|
||
| // Kernel5 uses cuda-accuracy acos. | ||
| deviceQueue.submit([&](handler& cgh) { | ||
| cgh.parallel_for<class Kernel5>(numOfItems, | ||
| [=](id<1> wiID) { | ||
| // CHECK: call double @llvm.fpbuiltin.acos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[ASPECT4:[0-9]+]] | ||
| (void)acos(Value); | ||
| }); | ||
| }); | ||
|
|
||
| // Kernel6 uses sycl-accuracy asin. | ||
| deviceQueue.submit([&](handler& cgh) { | ||
| cgh.parallel_for<class Kernel6>(numOfItems, | ||
| [=](id<1> wiID) { | ||
| // CHECK: call double @llvm.fpbuiltin.asin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[ASPECT5:[0-9]+]] | ||
| (void)asin(Value); | ||
| }); | ||
| }); | ||
|
|
||
| // Kernel7 uses high-accuracy sqrt. | ||
| deviceQueue.submit([&](handler& cgh) { | ||
| cgh.parallel_for<class Kernel7>(numOfItems, | ||
| [=](id<1> wiID) { | ||
| // CHECK: call double @llvm.fpbuiltin.sqrt.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[ASPECT1:[0-9]+]] | ||
| (void)sqrt(Value); | ||
| }); | ||
| }); | ||
| return 0; | ||
| } | ||
|
|
||
| // CHECK: [[ASPECT1]] = !{i32 -1} | ||
| // CHECK: [[ASPECT2]] = !{i32 -2} | ||
| // CHECK: [[ASPECT3]] = !{i32 -3} | ||
| // CHECK: [[ASPECT4]] = !{i32 -5} | ||
|
||
| // CHECK: [[ASPECT5]] = !{i32 -4} | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you need this include here? I don't see you using internal aspects in this file.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Indeed, removed.