Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
ade97f5
Stream handing implementation - PR2268
zahiraam Apr 28, 2021
9a60e02
Stream handing implementation - PR2268
zahiraam Apr 28, 2021
eb0b35d
Stream handing implementation - code from draft PR2268
zahiraam Apr 29, 2021
d9f7e79
Stream handing implementation - code from draft PR2268
zahiraam Apr 29, 2021
ac64f6f
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam Apr 29, 2021
d504662
Adding finalize function
zahiraam Apr 29, 2021
cf327e5
Adding finalize function
zahiraam Apr 29, 2021
75d0e57
Review comments fixes
zahiraam May 3, 2021
a54e368
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam May 3, 2021
afdc281
Review comments fixes
zahiraam May 4, 2021
63efc9d
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam May 4, 2021
64544b9
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam May 5, 2021
2bd73f4
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam May 6, 2021
872f494
Review comments fixes
zahiraam May 6, 2021
e979fe1
Review comments fixes
zahiraam May 6, 2021
c3e0dcc
Review comments fixes
zahiraam May 6, 2021
45cd12f
Review comments fixes
zahiraam May 6, 2021
d2e42e5
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam May 7, 2021
8dad222
Completed resolving conflict
zahiraam May 7, 2021
ea58203
Completed resolving conflict
zahiraam May 7, 2021
41f9f12
Fixed resolution conflict
zahiraam May 7, 2021
fe5935d
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam May 17, 2021
98220c3
Fixed LIT test
zahiraam May 18, 2021
5a1355c
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam May 18, 2021
4b7c9d9
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam May 19, 2021
0fd8564
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam May 25, 2021
1dbf04c
Fixing ESIMD test failures
zahiraam May 25, 2021
02e5c06
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam May 25, 2021
d5f0fea
Fixing LIT failure
zahiraam May 25, 2021
ab02a86
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam May 26, 2021
84dc696
Fix after review
zahiraam May 26, 2021
772cb8a
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam May 27, 2021
59047b7
Fix after review and renamed SemaSYCL/streams.cpp to SemaSYCL/stream.cpp
zahiraam May 27, 2021
71d1c57
Merge remote-tracking branch 'remote/sycl' into stream-class
zahiraam May 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,8 @@ class SYCLIntegrationHeader {
kind_sampler,
kind_pointer,
kind_specialization_constants_buffer,
kind_last = kind_specialization_constants_buffer
kind_stream,
kind_last = kind_stream
};

public:
Expand Down
98 changes: 7 additions & 91 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1025,23 +1025,6 @@ class KernelObjVisitor {
VisitRecordFields(Owner, Handlers...);
}

// FIXME: Can this be refactored/handled some other way?
template <typename ParentTy, typename... HandlerTys>
void visitStreamRecord(const CXXRecordDecl *Owner, ParentTy &Parent,
CXXRecordDecl *Wrapper, QualType RecordTy,
HandlerTys &... Handlers) {
(void)std::initializer_list<int>{
(Handlers.enterStream(Owner, Parent, RecordTy), 0)...};
for (const auto &Field : Wrapper->fields()) {
QualType FieldTy = Field->getType();
// Required to initialize accessors inside streams.
if (Util::isSyclAccessorType(FieldTy))
KF_FOR_EACH(handleSyclAccessorType, Field, FieldTy);
}
(void)std::initializer_list<int>{
(Handlers.leaveStream(Owner, Parent, RecordTy), 0)...};
}

template <typename... HandlerTys>
void visitArrayElementImpl(const CXXRecordDecl *Owner, FieldDecl *ArrayField,
QualType ElementTy, uint64_t Index,
Expand Down Expand Up @@ -1116,12 +1099,9 @@ class KernelObjVisitor {
KF_FOR_EACH(handleSyclHalfType, Field, FieldTy);
else if (Util::isSyclSpecConstantType(FieldTy))
KF_FOR_EACH(handleSyclSpecConstantType, Field, FieldTy);
else if (Util::isSyclStreamType(FieldTy)) {
CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl();
// Handle accessors in stream class.
else if (Util::isSyclStreamType(FieldTy))
KF_FOR_EACH(handleSyclStreamType, Field, FieldTy);
visitStreamRecord(Owner, Field, RD, FieldTy, Handlers...);
} else if (FieldTy->isStructureOrClassType()) {
else if (FieldTy->isStructureOrClassType()) {
if (KF_FOR_EACH(handleStructType, Field, FieldTy)) {
CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl();
visitRecord(Owner, Field, RD, FieldTy, Handlers...);
Expand Down Expand Up @@ -1235,12 +1215,6 @@ class SyclKernelFieldHandlerBase {
virtual bool leaveStruct(const CXXRecordDecl *, FieldDecl *, QualType) {
return true;
}
virtual bool enterStream(const CXXRecordDecl *, FieldDecl *, QualType) {
return true;
}
virtual bool leaveStream(const CXXRecordDecl *, FieldDecl *, QualType) {
return true;
}
virtual bool enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &,
QualType) {
return true;
Expand Down Expand Up @@ -1688,18 +1662,6 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler {
return true;
}

// Stream is always decomposed (and whether it gets decomposed is handled in
// handleSyclStreamType), but we need a CollectionStack entry to capture the
// accessors that get handled.
bool enterStream(const CXXRecordDecl *, FieldDecl *, QualType) final {
CollectionStack.push_back(false);
return true;
}
bool leaveStream(const CXXRecordDecl *, FieldDecl *, QualType Ty) final {
CollectionStack.pop_back();
return true;
}

bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) final {
CollectionStack.push_back(false);
return true;
Expand Down Expand Up @@ -1952,14 +1914,6 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {
SemaRef.addSyclDeviceDecl(KernelDecl);
}

bool enterStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
return enterStruct(RD, FD, Ty);
}

bool leaveStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
return leaveStruct(RD, FD, Ty);
}

bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) final {
++StructDepth;
return true;
Expand Down Expand Up @@ -2095,8 +2049,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {
}

bool handleSyclStreamType(FieldDecl *FD, QualType FieldTy) final {
addParam(FD, FieldTy);
return true;
return handleSpecialType(FD, FieldTy);
}

bool handleSyclStreamType(const CXXRecordDecl *, const CXXBaseSpecifier &,
Expand Down Expand Up @@ -2591,6 +2544,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {

const auto *RecordDecl = Ty->getAsCXXRecordDecl();
createSpecialMethodCall(RecordDecl, getInitMethodName(), BodyStmts);
createSpecialMethodCall(RecordDecl, FinalizeMethodName, BodyStmts);

removeFieldMemberExpr(FD, Ty);

Expand Down Expand Up @@ -2684,9 +2638,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
}

bool handleSyclStreamType(FieldDecl *FD, QualType Ty) final {
// Streams just get copied as a new init.
addSimpleFieldInit(FD, Ty);
return true;
return handleSpecialType(FD, Ty);
}

bool handleSyclStreamType(const CXXRecordDecl *, const CXXBaseSpecifier &BS,
Expand Down Expand Up @@ -2763,31 +2715,6 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
handleSpecialType(KernelHandlerArg->getType());
}

bool enterStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
++StructDepth;
// Add a dummy init expression to catch the accessor initializers.
const auto *StreamDecl = Ty->getAsCXXRecordDecl();
CollectionInitExprs.push_back(createInitListExpr(StreamDecl));

addFieldMemberExpr(FD, Ty);
return true;
}

bool leaveStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
--StructDepth;
// Stream requires that its 'init' calls happen after its accessors init
// calls, so add them here instead.
const auto *StreamDecl = Ty->getAsCXXRecordDecl();

createSpecialMethodCall(StreamDecl, getInitMethodName(), BodyStmts);
createSpecialMethodCall(StreamDecl, FinalizeMethodName, FinalizeStmts);

removeFieldMemberExpr(FD, Ty);

CollectionInitExprs.pop_back();
return true;
}

bool enterStruct(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
++StructDepth;
addCollectionInitListExpr(Ty->getAsCXXRecordDecl());
Expand Down Expand Up @@ -3101,7 +3028,7 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler {
}

bool handleSyclStreamType(FieldDecl *FD, QualType FieldTy) final {
addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout);
addParam(FD, FieldTy, SYCLIntegrationHeader::kind_stream);
return true;
}

Expand Down Expand Up @@ -3133,18 +3060,6 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler {
SYCLIntegrationHeader::kind_specialization_constants_buffer, 0);
}

bool enterStream(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final {
++StructDepth;
CurOffset += offsetOf(FD, Ty);
return true;
}

bool leaveStream(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final {
--StructDepth;
CurOffset -= offsetOf(FD, Ty);
return true;
}

bool enterStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final {
++StructDepth;
CurOffset += offsetOf(FD, Ty);
Expand Down Expand Up @@ -3948,6 +3863,7 @@ static const char *paramKind2Str(KernelParamKind K) {
CASE(accessor);
CASE(std_layout);
CASE(sampler);
CASE(stream);
CASE(specialization_constants_buffer);
CASE(pointer);
}
Expand Down
15 changes: 14 additions & 1 deletion clang/test/CodeGenSYCL/Inputs/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ class accessor {
void __init(__attribute__((opencl_global)) dataT *Ptr, range<dimensions> AccessRange,
range<dimensions> MemRange, id<dimensions> Offset) {}
void __init_esimd(__attribute__((opencl_global)) dataT *Ptr) {}
friend class stream;
};

template <int dimensions, access::mode accessmode, access::target accesstarget>
Expand Down Expand Up @@ -411,10 +412,22 @@ class stream {
public:
stream(unsigned long BufferSize, unsigned long MaxStatementSize,
handler &CGH) {}
#ifdef __SYCL_DEVICE_ONLY__
// Default constructor for objects later initialized with __init member.
stream() = default;
#endif

void __init() {}
void __init(__attribute((opencl_global)) char *Ptr, range<1> AccessRange,
range<1> MemRange, id<1> Offset, int _FlushBufferSize) {
Acc.__init(Ptr, AccessRange, MemRange, Offset);
FlushBufferSize = _FlushBufferSize;
}

void __finalize() {}

private:
cl::sycl::accessor<char, 1, cl::sycl::access::mode::read_write> Acc;
int FlushBufferSize;
};

template <typename T>
Expand Down
16 changes: 12 additions & 4 deletions clang/test/CodeGenSYCL/stream.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o %t.ll
// RUN: FileCheck < %t.ll --enable-var-scope %s
//
// CHECK: define {{.*}}spir_kernel void @"{{.*}}StreamTester"(%"{{.*}}cl::sycl::stream"* byval(%"{{.*}}cl::sycl::stream") {{.*}}){{.*}}
// CHECK: call spir_func void @{{.*}}__init{{.*}}(%{{.*}}cl::sycl::stream{{.*}} addrspace(4)* {{[^,]*}} %{{[0-9]+}})
// CHECK: call spir_func void @{{.*}}__finalize{{.*}}(%{{.*}}cl::sycl::stream{{.*}} addrspace(4)* {{[^,]*}} %{{[0-9]+}})
//

// CHECK: %[[RANGE_TYPE:"struct.*cl::sycl::range"]]
// CHECK: %[[ID_TYPE:"struct.*cl::sycl::id"]]
// CHECK: define dso_local spir_kernel void @{{.*}}StreamTester
// CHECK-SAME: i8 addrspace(1)* [[ACC_DATA:%[a-zA-Z0-9_]+]],
// CHECK-SAME: %[[RANGE_TYPE]]* byval(%[[RANGE_TYPE]]) align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+]],
// CHECK-SAME: %[[RANGE_TYPE]]* byval(%[[RANGE_TYPE]]) align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+]],
// CHECK-SAME: %[[ID_TYPE]]* byval(%[[ID_TYPE]]) align 4 [[ACC_ID:%[a-zA-Z0-9_]+]],
// CHECK-SAME: i32 [[ACC_INT:%[a-zA-Z0-9_]+]])

// CHECK: call spir_func void @{{.*}}__init{{.*}}(%{{.*}}cl::sycl::stream{{.*}} addrspace(4)* dereferenceable_or_null(16) %{{[0-9]+}}, i8 addrspace(1)* %5, %[[RANGE_TYPE]]* byval(%[[RANGE_TYPE]]) {{.*}}%{{.*}}
// CHECK: call spir_func void @{{.*}}__finalizeEv{{.*}}(%{{.*}}cl::sycl::stream{{.*}}" addrspace(4)* dereferenceable_or_null(16) %{{[0-9]+}})

#include "Inputs/sycl.hpp"

Expand Down
2 changes: 2 additions & 0 deletions sycl/include/CL/sycl/accessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
class stream;
namespace INTEL {
namespace gpu {
namespace detail {
Expand Down Expand Up @@ -929,6 +930,7 @@ class accessor :

private:
friend class sycl::INTEL::gpu::detail::AccessorPrivateProxy;
friend class sycl::stream;

public:
using value_type = DataT;
Expand Down
5 changes: 3 additions & 2 deletions sycl/include/CL/sycl/detail/kernel_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ enum class kernel_param_kind_t {
kind_accessor = 0,
kind_std_layout = 1, // standard layout object parameters
kind_sampler = 2,
kind_pointer = 3,
kind_specialization_constants_buffer = 4,
kind_stream = 3,
kind_pointer = 4,
kind_specialization_constants_buffer = 5,
};

// describes a kernel parameter
Expand Down
42 changes: 40 additions & 2 deletions sycl/include/CL/sycl/stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,23 @@ using GlobalBufAccessorT = accessor<char, 1, cl::sycl::access::mode::read_write,
cl::sycl::access::target::global_buffer,
cl::sycl::access::placeholder::false_t>;

constexpr static access::address_space GlobalBufAS =
TargetToAS<cl::sycl::access::target::global_buffer>::AS;
using GlobalBufPtrType =
typename detail::DecoratedType<char, GlobalBufAS>::type *;
constexpr static int GlobalBufDim = 1;

using GlobalOffsetAccessorT =
accessor<unsigned, 1, cl::sycl::access::mode::atomic,
cl::sycl::access::target::global_buffer,
cl::sycl::access::placeholder::false_t>;

constexpr static access::address_space GlobalOffsetAS =
TargetToAS<cl::sycl::access::target::global_buffer>::AS;
using GlobalOffsetPtrType =
typename detail::DecoratedType<unsigned, GlobalBufAS>::type *;
constexpr static int GlobalOffsetDim = 1;

// Read first 2 bytes of flush buffer to get buffer offset.
// TODO: Should be optimized to the following:
// return *reinterpret_cast<uint16_t *>(&GlobalFlushBuf[WIOffset]);
Expand Down Expand Up @@ -729,6 +741,11 @@ inline __width_manipulator__ setw(int Width) {
/// \ingroup sycl_api
class __SYCL_EXPORT stream {
public:
#ifdef __SYCL_DEVICE_ONLY__
// Default constructor for objects later initialized with __init member.
stream() = default;
#endif

// Throws exception in case of invalid input parameters
stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH);

Expand Down Expand Up @@ -845,7 +862,27 @@ class __SYCL_EXPORT stream {
}

#ifdef __SYCL_DEVICE_ONLY__
void __init() {
void __init(detail::GlobalBufPtrType GlobalBufPtr,
range<detail::GlobalBufDim> GlobalBufAccRange,
range<detail::GlobalBufDim> GlobalBufMemRange,
id<detail::GlobalBufDim> GlobalBufId,
detail::GlobalOffsetPtrType GlobalOffsetPtr,
range<detail::GlobalOffsetDim> GlobalOffsetAccRange,
range<detail::GlobalOffsetDim> GlobalOffsetMemRange,
id<detail::GlobalOffsetDim> GlobalOffsetId,
detail::GlobalBufPtrType GlobalFlushPtr,
range<detail::GlobalBufDim> GlobalFlushAccRange,
range<detail::GlobalBufDim> GlobalFlushMemRange,
id<detail::GlobalBufDim> GlobalFlushId, size_t _FlushBufferSize) {
#ifndef __SYCL_EXPLICIT_SIMD__
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that __SYCL_EXPLICIT_SIMD__ doesn't exist anymore.

GlobalBuf.__init(GlobalBufPtr, GlobalBufAccRange, GlobalBufMemRange,
GlobalBufId);
GlobalOffset.__init(GlobalOffsetPtr, GlobalOffsetAccRange,
GlobalOffsetMemRange, GlobalOffsetId);
GlobalFlushBuf.__init(GlobalFlushPtr, GlobalFlushAccRange,
GlobalFlushMemRange, GlobalFlushId);
#endif
FlushBufferSize = _FlushBufferSize;
// Calculate offset in the flush buffer for each work item in the global
// work space. We need to avoid calling intrinsics to get global id because
// when stream is used in a single_task kernel this could cause some
Expand All @@ -871,6 +908,8 @@ class __SYCL_EXPORT stream {
}
#endif

friend class handler;

friend const stream &operator<<(const stream &, const char);
friend const stream &operator<<(const stream &, const char *);
template <typename ValueType>
Expand Down Expand Up @@ -1118,4 +1157,3 @@ template <> struct hash<cl::sycl::stream> {
}
};
} // namespace std

2 changes: 2 additions & 0 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1660,6 +1660,8 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch(
if (!EliminatedArgMask.empty() && EliminatedArgMask[Arg.MIndex])
continue;
switch (Arg.MType) {
case kernel_param_kind_t::kind_stream:
break;
case kernel_param_kind_t::kind_accessor: {
Requirement *Req = (Requirement *)(Arg.MPtr);
AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
Expand Down
Loading