-
Notifications
You must be signed in to change notification settings - Fork 16k
[SPIR-V] Enable variadic function lowering for the SPIR-V target #175076
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-llvm-transforms Author: Joseph Huber (jhuber6) ChangesSummary: I am unsure what the ABI should look like here, I have mostly copied the Additionally, this required allowing the SPIRV_FUNC calling convention. Full diff: https://github.com/llvm/llvm-project/pull/175076.diff 6 Files Affected:
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 6c6c4794bba49..ba90ab3e67053 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -36,6 +36,8 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
public:
SPIRVABIInfo(CodeGenTypes &CGT) : CommonSPIRABIInfo(CGT) {}
void computeInfo(CGFunctionInfo &FI) const override;
+ RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
+ AggValueSlot Slot) const override;
private:
ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
@@ -207,6 +209,11 @@ void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
// arguments handling.
llvm::CallingConv::ID CC = FI.getCallingConvention();
+ for (auto &&[ArgumentsCount, I] : llvm::enumerate(FI.arguments()))
+ I.info = ArgumentsCount < FI.getNumRequiredArgs()
+ ? classifyArgumentType(I.type)
+ : ABIArgInfo::getDirect();
+
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
@@ -219,6 +226,14 @@ void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
}
}
+RValue SPIRVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty, AggValueSlot Slot) const {
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/false,
+ getContext().getTypeInfoInChars(Ty),
+ CharUnits::fromQuantity(1),
+ /*AllowHigherAlign=*/true, Slot);
+}
+
unsigned AMDGCNSPIRVABIInfo::numRegsForType(QualType Ty) const {
// This duplicates the AMDGPUABI computation.
unsigned NumRegs = 0;
diff --git a/clang/test/CodeGenSPIRV/Builtins/variadic.c b/clang/test/CodeGenSPIRV/Builtins/variadic.c
new file mode 100644
index 0000000000000..64915a3f0a8ae
--- /dev/null
+++ b/clang/test/CodeGenSPIRV/Builtins/variadic.c
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple spirv64 -emit-llvm -o - %s | FileCheck %s
+
+extern void varargs_simple(int, ...);
+
+void foo() {
+ char c = '\x1';
+ short s = 1;
+ int i = 1;
+ long l = 1;
+ float f = 1.f;
+ double d = 1.;
+ varargs_simple(0, c, s, i, l, f, d);
+
+ struct {int x; char c; int y;} a = {1, '\x1', 1};
+ varargs_simple(0, a);
+
+ typedef int __attribute__((ext_vector_type(4))) int4;
+ int4 v = {1, 1, 1, 1};
+ varargs_simple(0, v);
+
+ struct {char c, d;} t;
+ varargs_simple(0, t, t, 0, t);
+}
+
+typedef struct {long x; long y;} S;
+extern void varargs_complex(S, S, ...);
+
+void bar() {
+ S s = {1l, 1l};
+ varargs_complex(s, s, 1, 1l, 1.0);
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 9a2b0771e4dc0..adaf34b897ab3 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -1020,12 +1020,6 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeFunction(
const FunctionType *Ty, SPIRVType *RetType,
const SmallVectorImpl<SPIRVType *> &ArgTypes,
MachineIRBuilder &MIRBuilder) {
- if (Ty->isVarArg()) {
- Function &Fn = MIRBuilder.getMF().getFunction();
- Ty->getContext().diagnose(DiagnosticInfoUnsupported(
- Fn, "SPIR-V does not support variadic functions",
- MIRBuilder.getDebugLoc()));
- }
return createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeFunction)
.addDef(createTypeVReg(MIRBuilder))
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index 10038753f4a75..4769d6a92eba4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -33,6 +33,7 @@
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include <optional>
@@ -178,6 +179,7 @@ void SPIRVPassConfig::addIRPasses() {
addPass(createSPIRVRegularizerPass());
addPass(createSPIRVPrepareFunctionsPass(TM));
addPass(createSPIRVPrepareGlobalsPass());
+ addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
}
void SPIRVPassConfig::addISelPrepare() {
diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
index 4863d6ba789a8..6a92a0dca4d37 100644
--- a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
+++ b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
@@ -230,7 +230,8 @@ class ExpandVariadics : public ModulePass {
F->hasFnAttribute(Attribute::Naked))
return false;
- if (F->getCallingConv() != CallingConv::C)
+ if (F->getCallingConv() != CallingConv::C &&
+ F->getCallingConv() != CallingConv::SPIR_FUNC)
return false;
if (rewriteABI())
@@ -249,7 +250,8 @@ class ExpandVariadics : public ModulePass {
return false;
}
- if (CI->getCallingConv() != CallingConv::C)
+ if (CI->getCallingConv() != CallingConv::C &&
+ CI->getCallingConv() != CallingConv::SPIR_FUNC)
return false;
return true;
@@ -940,6 +942,33 @@ struct NVPTX final : public VariadicABIInfo {
}
};
+struct SPIRV final : public VariadicABIInfo {
+
+ bool enableForTarget() override { return true; }
+
+ bool vaListPassedInSSARegister() override { return true; }
+
+ Type *vaListType(LLVMContext &Ctx) override {
+ return PointerType::getUnqual(Ctx);
+ }
+
+ Type *vaListParameterType(Module &M) override {
+ return PointerType::getUnqual(M.getContext());
+ }
+
+ Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst *, Value *Buffer) override {
+ return Builder.CreateAddrSpaceCast(Buffer, vaListParameterType(M));
+ }
+
+ VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
+ // Expects natural alignment in all cases. The variadic call ABI will handle
+ // promoting types to their appropriate size and alignment.
+ Align A = DL.getABITypeAlign(Parameter);
+ return {A, false};
+ }
+};
+
struct Wasm final : public VariadicABIInfo {
bool enableForTarget() override {
@@ -995,6 +1024,11 @@ std::unique_ptr<VariadicABIInfo> VariadicABIInfo::create(const Triple &T) {
return std::make_unique<NVPTX>();
}
+ case Triple::spirv:
+ case Triple::spirv64: {
+ return std::make_unique<SPIRV>();
+ }
+
default:
return {};
}
diff --git a/llvm/test/CodeGen/SPIRV/function/variadics-lowering.ll b/llvm/test/CodeGen/SPIRV/function/variadics-lowering.ll
new file mode 100644
index 0000000000000..5ce4414c65ada
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/function/variadics-lowering.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=spirv64-- --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s
+
+%struct.S = type { i64, i64 }
+%struct.anon = type { i32, i8, i32 }
+%struct.anon.0 = type { i8, i8 }
+
+@__const.foo.a = private unnamed_addr addrspace(1) constant { i32, i8, [3 x i8], i32 } { i32 1, i8 1, [3 x i8] zeroinitializer, i32 1 }, align 4
+@__const.bar.s = private unnamed_addr addrspace(1) constant %struct.S { i64 1, i64 1 }, align 8
+
+define spir_func void @foo() {
+; CHECK-LABEL: define spir_func void @foo() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[C:%.*]] = alloca i8, align 1
+; CHECK-NEXT: [[S:%.*]] = alloca i16, align 2
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[L:%.*]] = alloca i64, align 8
+; CHECK-NEXT: [[F:%.*]] = alloca float, align 4
+; CHECK-NEXT: [[D:%.*]] = alloca double, align 8
+; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4
+; CHECK-NEXT: [[V:%.*]] = alloca <4 x i32>, align 16
+; CHECK-NEXT: [[T:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1
+; CHECK-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[FOO_VARARG:%.*]], align 4
+; CHECK-NEXT: [[VARARG_BUFFER1:%.*]] = alloca [[FOO_VARARG_0:%.*]], align 16
+; CHECK-NEXT: [[VARARG_BUFFER2:%.*]] = alloca [[FOO_VARARG_1:%.*]], align 4
+; CHECK-NEXT: [[VARARG_BUFFER3:%.*]] = alloca [[FOO_VARARG_2:%.*]], align 8
+; CHECK-NEXT: [[VARARG_BUFFER4:%.*]] = alloca [[FOO_VARARG_3:%.*]], align 8
+; CHECK-NEXT: store i8 1, ptr [[C]], align 1
+; CHECK-NEXT: store i16 1, ptr [[S]], align 2
+; CHECK-NEXT: store i32 1, ptr [[I]], align 4
+; CHECK-NEXT: store i64 1, ptr [[L]], align 8
+; CHECK-NEXT: store float 1.000000e+00, ptr [[F]], align 4
+; CHECK-NEXT: store double 1.000000e+00, ptr [[D]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[C]], align 1
+; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[S]], align 2
+; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[L]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F]], align 4
+; CHECK-NEXT: [[CONV2:%.*]] = fpext float [[TMP4]] to double
+; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[D]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VARARG_BUFFER3]])
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[FOO_VARARG_2]], ptr [[VARARG_BUFFER3]], i32 0, i32 0
+; CHECK-NEXT: store i32 [[CONV]], ptr [[TMP13]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[FOO_VARARG_2]], ptr [[VARARG_BUFFER3]], i32 0, i32 1
+; CHECK-NEXT: store i32 [[CONV1]], ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[FOO_VARARG_2]], ptr [[VARARG_BUFFER3]], i32 0, i32 2
+; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[FOO_VARARG_2]], ptr [[VARARG_BUFFER3]], i32 0, i32 4
+; CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP9]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[FOO_VARARG_2]], ptr [[VARARG_BUFFER3]], i32 0, i32 5
+; CHECK-NEXT: store double [[CONV2]], ptr [[TMP10]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[FOO_VARARG_2]], ptr [[VARARG_BUFFER3]], i32 0, i32 6
+; CHECK-NEXT: store double [[TMP5]], ptr [[TMP11]], align 8
+; CHECK-NEXT: call spir_func void @varargs_simple(i32 0, ptr [[VARARG_BUFFER3]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VARARG_BUFFER3]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p1.i64(ptr align 4 [[A]], ptr addrspace(1) align 4 @__const.foo.a, i64 12, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VARARG_BUFFER2]])
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[FOO_VARARG_1]], ptr [[VARARG_BUFFER2]], i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP12]], ptr [[A]], i64 12, i1 false)
+; CHECK-NEXT: call spir_func void @varargs_simple(i32 0, ptr [[VARARG_BUFFER2]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VARARG_BUFFER2]])
+; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[V]], align 16
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[V]], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VARARG_BUFFER1]])
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[FOO_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 0
+; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP14]], align 16
+; CHECK-NEXT: call spir_func void @varargs_simple(i32 0, ptr [[VARARG_BUFFER1]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VARARG_BUFFER1]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VARARG_BUFFER]])
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[FOO_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP15]], ptr [[T]], i64 2, i1 false)
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[FOO_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP16]], ptr [[T]], i64 2, i1 false)
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[FOO_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[FOO_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 3
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP18]], ptr [[T]], i64 2, i1 false)
+; CHECK-NEXT: call spir_func void @varargs_simple(i32 0, ptr [[VARARG_BUFFER]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VARARG_BUFFER]])
+; CHECK-NEXT: [[R:%.*]] = alloca [[STRUCT_S:%.*]], align 8
+; CHECK-NEXT: call void @llvm.memcpy.p0.p1.i64(ptr align 8 [[R]], ptr addrspace(1) align 8 @__const.bar.s, i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VARARG_BUFFER4]])
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[FOO_VARARG_3]], ptr [[VARARG_BUFFER4]], i32 0, i32 0
+; CHECK-NEXT: store i32 1, ptr [[TMP19]], align 4
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[FOO_VARARG_3]], ptr [[VARARG_BUFFER4]], i32 0, i32 2
+; CHECK-NEXT: store i64 1, ptr [[TMP20]], align 8
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[FOO_VARARG_3]], ptr [[VARARG_BUFFER4]], i32 0, i32 3
+; CHECK-NEXT: store double 1.000000e+00, ptr [[TMP21]], align 8
+; CHECK-NEXT: call spir_func void @varargs_complex(ptr byval([[STRUCT_S]]) align 8 [[R]], ptr byval([[STRUCT_S]]) align 8 [[S]], ptr [[VARARG_BUFFER4]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VARARG_BUFFER4]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %c = alloca i8, align 1
+ %s = alloca i16, align 2
+ %i = alloca i32, align 4
+ %l = alloca i64, align 8
+ %f = alloca float, align 4
+ %d = alloca double, align 8
+ %a = alloca %struct.anon, align 4
+ %v = alloca <4 x i32>, align 16
+ %t = alloca %struct.anon.0, align 1
+ store i8 1, ptr %c, align 1
+ store i16 1, ptr %s, align 2
+ store i32 1, ptr %i, align 4
+ store i64 1, ptr %l, align 8
+ store float 1.000000e+00, ptr %f, align 4
+ store double 1.000000e+00, ptr %d, align 8
+ %0 = load i8, ptr %c, align 1
+ %conv = sext i8 %0 to i32
+ %1 = load i16, ptr %s, align 2
+ %conv1 = sext i16 %1 to i32
+ %2 = load i32, ptr %i, align 4
+ %3 = load i64, ptr %l, align 8
+ %4 = load float, ptr %f, align 4
+ %conv2 = fpext float %4 to double
+ %5 = load double, ptr %d, align 8
+ call spir_func void (i32, ...) @varargs_simple(i32 0, i32 %conv, i32 %conv1, i32 %2, i64 %3, double %conv2, double %5)
+ call void @llvm.memcpy.p0.p1.i64(ptr align 4 %a, ptr addrspace(1) align 4 @__const.foo.a, i64 12, i1 false)
+ call spir_func void (i32, ...) @varargs_simple(i32 0, ptr byval(%struct.anon) align 4 %a)
+ store <4 x i32> splat (i32 1), ptr %v, align 16
+ %6 = load <4 x i32>, ptr %v, align 16
+ call spir_func void (i32, ...) @varargs_simple(i32 0, <4 x i32> %6)
+ call spir_func void (i32, ...) @varargs_simple(i32 0, ptr byval(%struct.anon.0) align 1 %t, ptr byval(%struct.anon.0) align 1 %t, i32 0, ptr byval(%struct.anon.0) align 1 %t)
+ %r = alloca %struct.S, align 8
+ call void @llvm.memcpy.p0.p1.i64(ptr align 8 %r, ptr addrspace(1) align 8 @__const.bar.s, i64 16, i1 false)
+ call spir_func void (ptr, ptr, ...) @varargs_complex(ptr byval(%struct.S) align 8 %r, ptr byval(%struct.S) align 8 %s, i32 1, i64 1, double 1.000000e+00)
+ ret void
+}
+
+declare spir_func void @varargs_simple(i32 noundef, ...)
+
+declare spir_func void @varargs_complex(ptr byval(%struct.S) align 8, ptr byval(%struct.S) align 8, ...)
|
ffa2dca to
3b6e96d
Compare
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
| } | ||
|
|
||
| if (CI->getCallingConv() != CallingConv::C) | ||
| if (CI->getCallingConv() != CallingConv::C && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unrelated but this is bugged and should handle coldcc and the other ccc-alias-like ccs
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it bugged or just overly restrictive? I'm not sure which calling conventions are strictly compatible with C as a whole.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Given we rely on this for lowering, that's a bug
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Alright, could make a separate PR to check for compatible ones.
3b6e96d to
fb0c71b
Compare
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
fb0c71b to
298058a
Compare
|
This currently fails the Variadics support as (hopefully) provided by this PR would let you just write a |
298058a to
de5bc99
Compare
de5bc99 to
9cd0027
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
where it's basically a struct layout with natural alignment
Frankly, that would be my preferable option as well. The only thing that I'm not really comfortable to answer is what should be the layout of such structure for Shader SPIR-V coming from HLSL. That's why I'd like to hear from @Keenuts . May be it's worth to disable this pass outside of the SPIR-V-for-compute compilation path.
| ; CHECK-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[FOO_VARARG:%.*]], align 4 | ||
| ; CHECK-NEXT: [[VARARG_BUFFER1:%.*]] = alloca [[FOO_VARARG_0:%.*]], align 16 | ||
| ; CHECK-NEXT: [[VARARG_BUFFER2:%.*]] = alloca [[FOO_VARARG_1:%.*]], align 4 | ||
| ; CHECK-NEXT: [[VARARG_BUFFER3:%.*]] = alloca [[FOO_VARARG_2:%.*]], align 8 | ||
| ; CHECK-NEXT: [[VARARG_BUFFER4:%.*]] = alloca [[FOO_VARARG_3:%.*]], align 8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lets spell check for the types explicitly, it's a bit hard to restore vararg structure type (for me as a reviewer) looking at GEPs followed by load/stores. Meanwhile ensuring, that the type was created correctly is crucial.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is generated code so I think it just numbers them like this. I could try to give them their original names but it's a little tough because of how much code I'll need to generate to be thorough.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Apologies, I didn't mean to suggest re-naming these variables. My considerations are:
aren't these lines in LLVM IR looking like:
%VARARG_BUFFER = alloca { i32, i32, i32, i64 ....}, align ... ?
If so, I'd suggest to just hard-coded replacement of FOO_VARARG variables with the type in allocas and GEPs.
Otherwise, If the type is outlined in the top of the module - even better, we can define FOO_VARARG_%number% using the top-level type declaration.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried to make the test easier to follow while still covering the core ABI types. Is this better?
Summary: We support variadic functions in AMDGPU / NVPTX via an LLVM-IR pass. This patch applies the same handling here to support them on this target. I am unsure what the ABI should look like here, I have mostly copied the one we use for NVPTX where it's basically a struct layout with natural alignment. This wastes some space, which is why AMDGPU does not pad them. Additionally, this required allowing the SPIRV_FUNC calling convention. I'm assuming this is compatible with the C calling convention in IR, but I will need someone to confirm that for me.
9cd0027 to
2f56e73
Compare
Is there an easy way to detect this? Like a module flag? |
Maybe this? |
|
Ping, we no longer do this for shaders and the lowering should be the same as the well-tested PTX version. |
JonChesterfield
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Implemention looks sound. Khronos might have strong feelings that they want a different ABI in which case we could change this to match their preference later, though the pointer to contiguous memory does seem uncontentious.
We seem to be missing the x64 and aarch64 paths (would make static variadic wrappers free), nasty feeling they were on the dev machine I returned to AMD. Will see if I can find a few hours to reimplement them.
MrSidims
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
farzonl
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking specifically to make sure this won’t impact hlsl shaders. Looks like it has sufficient checks for that so LGTM.
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/169/builds/19095 Here is the relevant piece of the build log for the reference |
…m#175076) Summary: We support variadic functions in AMDGPU / NVPTX via an LLVM-IR pass. This patch applies the same handling here to support them on this target. I am unsure what the ABI should look like here, I have mostly copied the one we use for NVPTX where it's basically a struct layout with natural alignment. This wastes some space, which is why AMDGPU does not pad them. Additionally, this required allowing the SPIRV_FUNC calling convention. I'm assuming this is compatible with the C calling convention in IR, but I will need someone to confirm that for me.
Summary:
We support variadic functions in AMDGPU / NVPTX via an LLVM-IR pass.
This patch applies the same handling here to support them on this
target.
I am unsure what the ABI should look like here, I have mostly copied the
one we use for NVPTX where it's basically a struct layout with natural
alignment. This wastes some space, which is why AMDGPU does not pad
them.
Additionally, this required allowing the SPIRV_FUNC calling convention.
I'm assuming this is compatible with the C calling convention in IR, but
I will need someone to confirm that for me.