Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -581,8 +581,9 @@ using _pi_offload_entry = _pi_offload_entry_struct *;
// A type of a binary image property.
typedef enum {
PI_PROPERTY_TYPE_UNKNOWN,
PI_PROPERTY_TYPE_UINT32, // 32-bit integer
PI_PROPERTY_TYPE_STRING // null-terminated string
PI_PROPERTY_TYPE_UINT32, // 32-bit integer
PI_PROPERTY_TYPE_BYTE_ARRAY, // byte array
PI_PROPERTY_TYPE_STRING // null-terminated string
} pi_property_type;

// Device binary image property.
Expand Down Expand Up @@ -652,6 +653,8 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4;
#define PI_PROPERTY_SET_SPEC_CONST_MAP "SYCL/specialization constants"
/// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK defined in PropertySetIO.h
#define PI_PROPERTY_SET_DEVICELIB_REQ_MASK "SYCL/devicelib req mask"
/// PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO defined in PropertySetIO.h
#define PI_PROPERTY_SET_KERNEL_PARAM_OPT_INFO "SYCL/kernel param opt"

/// This struct is a record of the device binary information. If the Kind field
/// denotes a portable binary type (SPIR-V or LLVM IR), the DeviceTargetSpec
Expand Down
6 changes: 6 additions & 0 deletions sycl/include/CL/sycl/detail/pi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cassert>
#include <sstream>
#include <string>
#include <vector>

#ifdef XPTI_ENABLE_INSTRUMENTATION
// Forward declarations
Expand Down Expand Up @@ -204,6 +205,7 @@ class DeviceBinaryProperty {
: Prop(Prop) {}

pi_uint32 asUint32() const;
std::vector<unsigned char> asByteArray() const;
const char *asCString() const;

protected:
Expand Down Expand Up @@ -300,6 +302,9 @@ class DeviceBinaryImage {
/// value is 32-bit unsigned integer ID.
const PropertyRange &getSpecConstants() const { return SpecConstIDMap; }
const PropertyRange &getDeviceLibReqMask() const { return DeviceLibReqMask; }
const PropertyRange &getKernelParamOptInfo() const {
return KernelParamOptInfo;
}
virtual ~DeviceBinaryImage() {}

protected:
Expand All @@ -310,6 +315,7 @@ class DeviceBinaryImage {
pi::PiDeviceBinaryType Format = PI_DEVICE_BINARY_TYPE_NONE;
DeviceBinaryImage::PropertyRange SpecConstIDMap;
DeviceBinaryImage::PropertyRange DeviceLibReqMask;
DeviceBinaryImage::PropertyRange KernelParamOptInfo;
};

/// Tries to determine the device binary image foramat. Returns
Expand Down
31 changes: 23 additions & 8 deletions sycl/source/detail/pi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,9 @@ std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P) {
case PI_PROPERTY_TYPE_UINT32:
Out << "[UINT32] ";
break;
case PI_PROPERTY_TYPE_BYTE_ARRAY:
Out << "[Byte array] ";
break;
case PI_PROPERTY_TYPE_STRING:
Out << "[String] ";
break;
Expand All @@ -425,16 +428,20 @@ std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P) {
}
Out << P.Prop->Name << "=";

switch (P.Prop->Type) {
case PI_PROPERTY_TYPE_UINT32:
if (P.Prop->Type == PI_PROPERTY_TYPE_UINT32) {
Out << P.asUint32();
break;
case PI_PROPERTY_TYPE_STRING:
} else if (P.Prop->Type == PI_PROPERTY_TYPE_BYTE_ARRAY) {
std::vector<unsigned char> ByteArray = P.asByteArray();
std::ios_base::fmtflags FlagsBackup = Out.flags();
Out << std::hex;
for (auto Byte : ByteArray) {
Out << "0x" << static_cast<unsigned int>(Byte) << " ";
}
Out.flags(FlagsBackup);
} else if (P.Prop->Type == PI_PROPERTY_TYPE_STRING) {
Out << P.asCString();
break;
default:
assert("unsupported property");
return Out;
} else {
assert(false && "Unsupported property");
}
return Out;
}
Expand Down Expand Up @@ -491,6 +498,13 @@ pi_uint32 DeviceBinaryProperty::asUint32() const {
return sycl::detail::pi::asUint32(&Prop->ValSize);
}

std::vector<unsigned char> DeviceBinaryProperty::asByteArray() const {
assert(Prop->Type == PI_PROPERTY_TYPE_BYTE_ARRAY && "property type mismatch");
assert(Prop->ValSize > 0 && "property size mismatch");
const auto *Data = pi::cast<const unsigned char *>(Prop->ValAddr);
return {Data, Data + Prop->ValSize};
}

const char *DeviceBinaryProperty::asCString() const {
assert(Prop->Type == PI_PROPERTY_TYPE_STRING && "property type mismatch");
assert(Prop->ValSize > 0 && "property size mismatch");
Expand Down Expand Up @@ -550,6 +564,7 @@ void DeviceBinaryImage::init(pi_device_binary Bin) {

SpecConstIDMap.init(Bin, PI_PROPERTY_SET_SPEC_CONST_MAP);
DeviceLibReqMask.init(Bin, PI_PROPERTY_SET_DEVICELIB_REQ_MASK);
KernelParamOptInfo.init(Bin, PI_PROPERTY_SET_KERNEL_PARAM_OPT_INFO);
}

} // namespace pi
Expand Down
74 changes: 74 additions & 0 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,10 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M,
Img.getLinkOptions(), PiDevices,
ContextImpl->getCachedLibPrograms(), DeviceLibReqMask);

{
std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
NativePrograms[BuiltProgram.get()] = &Img;
}
return BuiltProgram.release();
};

Expand Down Expand Up @@ -851,6 +855,19 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context,
return Program;
}

static ProgramManager::KernelArgMask
createKernelArgMask(const std::vector<unsigned char> &Bytes) {
int SizeInBits = Bytes[0];

ProgramManager::KernelArgMask Result;
for (int I = SizeInBits - 1; I >= 0; --I) {
unsigned char Byte = Bytes[Bytes.size() - 1 - (I / CHAR_BIT)];
Result.push_back(Byte & (1 << (I % CHAR_BIT)));
}

return Result;
}

void ProgramManager::addImages(pi_device_binaries DeviceBinary) {
std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());

Expand All @@ -860,6 +877,17 @@ void ProgramManager::addImages(pi_device_binaries DeviceBinary) {
const _pi_offload_entry EntriesB = RawImg->EntriesBegin;
const _pi_offload_entry EntriesE = RawImg->EntriesEnd;
auto Img = make_unique_ptr<RTDeviceBinaryImage>(RawImg, M);

// Fill the kernel argument mask map
const pi::DeviceBinaryImage::PropertyRange &KPOIRange =
Img->getKernelParamOptInfo();
if (KPOIRange.isAvailable()) {
KernelNameToArgMaskMap &ArgMaskMap =
m_EliminatedKernelArgMasks[Img.get()];
for (const auto &Info : KPOIRange)
ArgMaskMap[Info->Name] =
createKernelArgMask(pi::DeviceBinaryProperty(Info).asByteArray());
}
// Use the entry information if it's available
if (EntriesB != EntriesE) {
// The kernel sets for any pair of images are either disjoint or
Expand Down Expand Up @@ -1018,6 +1046,52 @@ uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) {
return 0xFFFFFFFF;
}

ProgramManager::KernelArgMask ProgramManager::getEliminatedKernelArgMask(
OSModuleHandle M, const context &Context, pi::PiProgram NativePrg,
const string_class &KernelName, bool KnownProgram) {
if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
return {};

{
std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
auto ImgIt = NativePrograms.find(NativePrg);
if (ImgIt != NativePrograms.end()) {
auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
if (MapIt != m_EliminatedKernelArgMasks.end())
return MapIt->second[KernelName];
return {};
}
}

if (KnownProgram)
throw runtime_error("Program is not associated with a binary image",
PI_INVALID_VALUE);

// If not sure whether the program was built with one of the images, try
// finding the binary.
// TODO this can backfire in some extreme edge cases where there's a kernel
// name collision between our binaries and user-created native programs.
KernelSetId KSId;
try {
KSId = getKernelSetId(M, KernelName);
} catch (sycl::runtime_error &e) {
// If the kernel name wasn't found, assume that the program wasn't created
// from one of our device binary images.
if (e.get_cl_code() == PI_INVALID_KERNEL_NAME)
return {};
std::rethrow_exception(std::current_exception());
}
RTDeviceBinaryImage &Img = getDeviceImage(M, KSId, Context);
{
std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
NativePrograms[NativePrg] = &Img;
}
auto MapIt = m_EliminatedKernelArgMasks.find(&Img);
if (MapIt != m_EliminatedKernelArgMasks.end())
return MapIt->second[KernelName];
return {};
}

} // namespace detail
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
Expand Down
28 changes: 28 additions & 0 deletions sycl/source/detail/program_manager/program_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ enum class DeviceLibExt : std::uint32_t {
// that is necessary for no interoperability cases with lambda.
class ProgramManager {
public:
using KernelArgMask = std::vector<bool>;

// Returns the single instance of the program manager for the entire
// process. Can only be called after staticInit is done.
static ProgramManager &getInstance();
Expand Down Expand Up @@ -110,6 +112,22 @@ class ProgramManager {
const RTDeviceBinaryImage *Img = nullptr);
uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img);

/// Returns the mask for eliminated kernel arguments for the requested kernel
/// within the native program.
/// \param M identifies the OS module the kernel comes from (multiple OS
/// modules may have kernels with the same name).
/// \param Context the context associated with the kernel.
/// \param NativePrg the PI program associated with the kernel.
/// \param KernelName the name of the kernel.
/// \param KnownProgram indicates whether the PI program is guaranteed to
/// be known to program manager (built with its API) or not (not
/// cacheable or constructed with interoperability).
KernelArgMask getEliminatedKernelArgMask(OSModuleHandle M,
const context &Context,
pi::PiProgram NativePrg,
const string_class &KernelName,
bool KnownProgram);

private:
ProgramManager();
~ProgramManager() = default;
Expand Down Expand Up @@ -175,6 +193,8 @@ class ProgramManager {
// - knowing which specialization constants are used in the program and
// injecting their current values before compiling the SPIRV; the binary
// image object has info about all spec constants used in the module
// - finding kernel argument masks for kernels associated with each
// pi_program
// NOTE: using RTDeviceBinaryImage raw pointers is OK, since they are not
// referenced from outside SYCL runtime and RTDeviceBinaryImage object
// lifetime matches program manager's one.
Expand All @@ -186,6 +206,14 @@ class ProgramManager {

/// Protects NativePrograms that can be changed by class' methods.
std::mutex MNativeProgramsMutex;

using KernelNameToArgMaskMap =
std::unordered_map<string_class, KernelArgMask>;
/// Maps binary image and kernel name pairs to kernel argument masks which
/// specify which arguments were eliminated during device code optimization.
std::unordered_map<const RTDeviceBinaryImage *, KernelNameToArgMaskMap>
m_EliminatedKernelArgMasks;

/// True iff a SPIRV file has been specified with an environment variable
bool m_UseSpvFile = false;
};
Expand Down
44 changes: 35 additions & 9 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1647,9 +1647,19 @@ static void ReverseRangeDimensionsForKernel(NDRDescT &NDR) {

pi_result ExecCGCommand::SetKernelParamsAndLaunch(
CGExecKernel *ExecKernel, RT::PiKernel Kernel, NDRDescT &NDRDesc,
std::vector<RT::PiEvent> &RawEvents, RT::PiEvent &Event) {
std::vector<RT::PiEvent> &RawEvents, RT::PiEvent &Event,
ProgramManager::KernelArgMask EliminatedArgMask) {
assert(EliminatedArgMask.empty() ||
EliminatedArgMask.size() == ExecKernel->MArgs.size());
vector_class<ArgDesc> &Args = ExecKernel->MArgs;
std::sort(Args.begin(), Args.end(), [](const ArgDesc &A, const ArgDesc &B) {
return A.MIndex < B.MIndex;
});
int NextTrueIndex = 0;
const detail::plugin &Plugin = MQueue->getPlugin();
for (ArgDesc &Arg : ExecKernel->MArgs) {
if (!EliminatedArgMask.empty() && EliminatedArgMask[Arg.MIndex])
continue;
switch (Arg.MType) {
case kernel_param_kind_t::kind_accessor: {
Requirement *Req = (Requirement *)(Arg.MPtr);
Expand All @@ -1658,33 +1668,34 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch(
? (RT::PiMem)AllocaCmd->ESIMDExt.MWrapperImage
: (RT::PiMem)AllocaCmd->getMemAllocation();
if (Plugin.getBackend() == backend::opencl) {
Plugin.call<PiApiKind::piKernelSetArg>(Kernel, Arg.MIndex,
Plugin.call<PiApiKind::piKernelSetArg>(Kernel, NextTrueIndex,
sizeof(RT::PiMem), &MemArg);
} else {
Plugin.call<PiApiKind::piextKernelSetArgMemObj>(Kernel, Arg.MIndex,
Plugin.call<PiApiKind::piextKernelSetArgMemObj>(Kernel, NextTrueIndex,
&MemArg);
}
break;
}
case kernel_param_kind_t::kind_std_layout: {
Plugin.call<PiApiKind::piKernelSetArg>(Kernel, Arg.MIndex, Arg.MSize,
Plugin.call<PiApiKind::piKernelSetArg>(Kernel, NextTrueIndex, Arg.MSize,
Arg.MPtr);
break;
}
case kernel_param_kind_t::kind_sampler: {
sampler *SamplerPtr = (sampler *)Arg.MPtr;
RT::PiSampler Sampler = detail::getSyclObjImpl(*SamplerPtr)
->getOrCreateSampler(MQueue->get_context());
Plugin.call<PiApiKind::piextKernelSetArgSampler>(Kernel, Arg.MIndex,
Plugin.call<PiApiKind::piextKernelSetArgSampler>(Kernel, NextTrueIndex,
&Sampler);
break;
}
case kernel_param_kind_t::kind_pointer: {
Plugin.call<PiApiKind::piextKernelSetArgPointer>(Kernel, Arg.MIndex,
Plugin.call<PiApiKind::piextKernelSetArgPointer>(Kernel, NextTrueIndex,
Arg.MSize, Arg.MPtr);
break;
}
}
++NextTrueIndex;
}

adjustNDRangePerKernel(NDRDesc, Kernel,
Expand Down Expand Up @@ -1883,6 +1894,8 @@ cl_int ExecCGCommand::enqueueImp() {
sycl::context Context = MQueue->get_context();
RT::PiKernel Kernel = nullptr;
std::mutex *KernelMutex = nullptr;
RT::PiProgram Program = nullptr;
bool KnownProgram = true;

if (nullptr != ExecKernel->MSyclKernel) {
assert(ExecKernel->MSyclKernel->get_info<info::kernel::context>() ==
Expand All @@ -1891,6 +1904,7 @@ cl_int ExecCGCommand::enqueueImp() {

auto SyclProg = detail::getSyclObjImpl(
ExecKernel->MSyclKernel->get_info<info::kernel::program>());
Program = SyclProg->getHandleRef();
if (SyclProg->is_cacheable()) {
RT::PiKernel FoundKernel = nullptr;
std::tie(FoundKernel, KernelMutex) =
Expand All @@ -1899,23 +1913,35 @@ cl_int ExecCGCommand::enqueueImp() {
ExecKernel->MSyclKernel->get_info<info::kernel::context>(),
ExecKernel->MKernelName, SyclProg.get());
assert(FoundKernel == Kernel);
}
} else
KnownProgram = false;
} else {
std::tie(Kernel, KernelMutex) =
detail::ProgramManager::getInstance().getOrCreateKernel(
ExecKernel->MOSModuleHandle, Context, ExecKernel->MKernelName,
nullptr);
MQueue->getPlugin().call<PiApiKind::piKernelGetInfo>(
Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(RT::PiProgram), &Program,
nullptr);
}

pi_result Error = PI_SUCCESS;
ProgramManager::KernelArgMask EliminatedArgMask;
if (nullptr == ExecKernel->MSyclKernel ||
!ExecKernel->MSyclKernel->isCreatedFromSource()) {
EliminatedArgMask =
detail::ProgramManager::getInstance().getEliminatedKernelArgMask(
ExecKernel->MOSModuleHandle, Context, Program,
ExecKernel->MKernelName, KnownProgram);
}
if (KernelMutex != nullptr) {
// For cacheable kernels, we use per-kernel mutex
std::lock_guard<std::mutex> Lock(*KernelMutex);
Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, RawEvents,
Event);
Event, EliminatedArgMask);
} else {
Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, RawEvents,
Event);
Event, EliminatedArgMask);
}

if (PI_SUCCESS != Error) {
Expand Down
Loading