Skip to content

Commit

Permalink
[flang][OpenMP] Make FlangRuntime offload use builtins vs. libc
Browse files Browse the repository at this point in the history
    - Allows offload regions that need _FortranAAssign on device
      to link without GPU libc support
    - Fixes unresolved symbols: strlen, memcpy, memset, memmove
      by using builtins
    - Also moved a couple RT_OFFLOAD_VAR_GROUP_BEGIN to pick up
      additional declarations used in offload regions (seen in
      build warnings)
    - There are still more opportunities where std::mem* routines
      are called that could use Fortran::runtime::mem* instead
      but will hold off on more changes until this patch is more
      fully exercised
  • Loading branch information
dpalermo committed Oct 9, 2024
1 parent 31ffc5f commit c216291
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 21 deletions.
2 changes: 1 addition & 1 deletion flang/include/flang/Decimal/binary-floating-point.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ enum FortranRounding {

template <int BINARY_PRECISION> class BinaryFloatingPointNumber {
public:
RT_OFFLOAD_VAR_GROUP_BEGIN
static constexpr common::RealCharacteristics realChars{BINARY_PRECISION};
static constexpr int binaryPrecision{BINARY_PRECISION};
static constexpr int bits{realChars.bits};
Expand All @@ -47,7 +48,6 @@ template <int BINARY_PRECISION> class BinaryFloatingPointNumber {

using RawType = common::HostUnsignedIntType<bits>;
static_assert(CHAR_BIT * sizeof(RawType) >= bits);
RT_OFFLOAD_VAR_GROUP_BEGIN
static constexpr RawType significandMask{(RawType{1} << significandBits) - 1};

constexpr RT_API_ATTRS BinaryFloatingPointNumber() {} // zero
Expand Down
4 changes: 4 additions & 0 deletions flang/include/flang/Runtime/allocator-registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include <cstdlib>
#include <vector>

RT_OFFLOAD_VAR_GROUP_BEGIN

static constexpr unsigned kDefaultAllocator = 0;

// Allocator used for CUF
Expand All @@ -21,6 +23,8 @@ static constexpr unsigned kDeviceAllocatorPos = 2;
static constexpr unsigned kManagedAllocatorPos = 3;
static constexpr unsigned kUnifiedAllocatorPos = 4;

RT_OFFLOAD_VAR_GROUP_END

#define MAX_ALLOCATOR 7 // 3 bits are reserved in the descriptor.

namespace Fortran::runtime {
Expand Down
75 changes: 72 additions & 3 deletions flang/include/flang/Runtime/freestanding-tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,26 @@
#define STD_TOUPPER_UNSUPPORTED 1
#endif

#if defined(OMP_OFFLOAD_BUILD) || defined(OMP_NOHOST_BUILD)
// #pragma message "OMP_OFFLOAD_BUILD or OMP_NOHOST_BUILD is defined"
#define STD_LIBC_UNSUPPORTED 1
#define STD_FILL_N_UNSUPPORTED 1
#define STD_MEMSET_BUILTIN 1
#define STD_MEMSET_UNSUPPORTED 1
#define STD_MEMCPY_BUILTIN 1
#define STD_MEMCPY_UNSUPPORTED 1
#define STD_MEMMOVE_BUILTIN 1
#define STD_MEMMOVE_UNSUPPORTED 1
// #define STD_STRLEN_BUILTIN 1 // still resolves to strlen
#define STD_STRLEN_UNSUPPORTED 1
#define STD_MEMCMP_UNSUPPORTED 1
#define STD_REALLOC_UNSUPPORTED 1
#define STD_MEMCHR_UNSUPPORTED 1
#define STD_STRCPY_UNSUPPORTED 1
#define STD_STRCMP_UNSUPPORTED 1
#define STD_TOUPPER_UNSUPPORTED 1
#endif

namespace Fortran::runtime {

#if STD_FILL_N_UNSUPPORTED
Expand All @@ -79,7 +99,52 @@ fill_n(A *start, std::size_t count, const B &value) {
using std::fill_n;
#endif // !STD_FILL_N_UNSUPPORTED

#if STD_MEMMOVE_UNSUPPORTED
#if STD_MEMSET_BUILTIN
static inline RT_API_ATTRS void memset(
void *dest, uint8_t value, std::size_t count) {
__builtin_memset(dest, value, count);
}
#elif STD_MEMSET_UNSUPPORTED
static inline RT_API_ATTRS void memset(
void *dest, uint8_t value, std::size_t count) {
char *to{reinterpret_cast<char *>(dest)};
while (count--) {
*to++ = value;
}
return;
}
#else
using std::memset;
#endif

#if STD_MEMCPY_BUILTIN
static inline RT_API_ATTRS void memcpy(
void *dest, const void *src, std::size_t count) {
__builtin_memcpy(dest, src, count);
}
#elif STD_MEMCPY_UNSUPPORTED
static inline RT_API_ATTRS void memcpy(
void *dest, const void *src, std::size_t count) {
char *to{reinterpret_cast<char *>(dest)};
const char *from{reinterpret_cast<const char *>(src)};
if (to == from) {
return;
}
while (count--) {
*to++ = *from++;
}
return;
}
#else
using std::memcpy;
#endif

#if STD_MEMMOVE_BUILTIN
static inline RT_API_ATTRS void memmove(
void *dest, const void *src, std::size_t count) {
__builtin_memmove(dest, src, count);
}
#elif STD_MEMMOVE_UNSUPPORTED
// Provides alternative implementation for std::memmove(), if
// it is not supported.
static inline RT_API_ATTRS void memmove(
Expand All @@ -91,7 +156,7 @@ static inline RT_API_ATTRS void memmove(
return;
}
if (to + count <= from || from + count <= to) {
std::memcpy(dest, src, count);
memcpy(dest, src, count);
} else if (to < from) {
while (count--) {
*to++ = *from++;
Expand All @@ -108,7 +173,11 @@ static inline RT_API_ATTRS void memmove(
using std::memmove;
#endif // !STD_MEMMOVE_UNSUPPORTED

#if STD_STRLEN_UNSUPPORTED
#if STD_STRLEN_BUILTIN
static inline RT_API_ATTRS std::size_t strlen(const char *str) {
return __builtin_strlen(str);
}
#elif STD_STRLEN_UNSUPPORTED
// Provides alternative implementation for std::strlen(), if
// it is not supported.
static inline RT_API_ATTRS std::size_t strlen(const char *str) {
Expand Down
4 changes: 2 additions & 2 deletions flang/runtime/assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,15 +274,15 @@ RT_API_ATTRS static void Assign(
if (MayAlias(to, from)) {
if (mustDeallocateLHS) {
deferDeallocation = &deferredDeallocStatDesc.descriptor();
std::memcpy(deferDeallocation, &to, to.SizeInBytes());
Fortran::runtime::memcpy(deferDeallocation, &to, to.SizeInBytes());
to.set_base_addr(nullptr);
} else if (!isSimpleMemmove()) {
// Handle LHS/RHS aliasing by copying RHS into a temp, then
// recursively assigning from that temp.
auto descBytes{from.SizeInBytes()};
StaticDescriptor<maxRank, true, 16> staticDesc;
Descriptor &newFrom{staticDesc.descriptor()};
std::memcpy(&newFrom, &from, descBytes);
Fortran::runtime::memcpy(&newFrom, &from, descBytes);
// Pretend the temporary descriptor is for an ALLOCATABLE
// entity, otherwise, the Deallocate() below will not
// free the descriptor memory.
Expand Down
3 changes: 2 additions & 1 deletion flang/runtime/derived.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ RT_API_ATTRS int Initialize(const Descriptor &instance,
std::size_t bytes{comp.SizeInBytes(instance)};
for (std::size_t j{0}; j++ < elements; instance.IncrementSubscripts(at)) {
char *ptr{instance.ElementComponent<char>(at, comp.offset())};
std::memcpy(ptr, init, bytes);
// std::memcpy(ptr, init, bytes);
Fortran::runtime::memcpy(ptr, init, bytes);
}
} else if (comp.genre() == typeInfo::Component::Genre::Pointer) {
// Data pointers without explicit initialization are established
Expand Down
2 changes: 1 addition & 1 deletion flang/runtime/descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ RT_OFFLOAD_API_GROUP_BEGIN
RT_API_ATTRS Descriptor::Descriptor(const Descriptor &that) { *this = that; }

RT_API_ATTRS Descriptor &Descriptor::operator=(const Descriptor &that) {
std::memcpy(this, &that, that.SizeInBytes());
Fortran::runtime::memcpy(this, &that, that.SizeInBytes());
return *this;
}

Expand Down
6 changes: 3 additions & 3 deletions flang/runtime/stat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,10 @@ RT_API_ATTRS int ToErrmsg(const Descriptor *errmsg, int stat) {
std::size_t bufferLength{errmsg->ElementBytes()};
std::size_t msgLength{Fortran::runtime::strlen(msg)};
if (msgLength >= bufferLength) {
std::memcpy(buffer, msg, bufferLength);
Fortran::runtime::memcpy(buffer, msg, bufferLength);
} else {
std::memcpy(buffer, msg, msgLength);
std::memset(buffer + msgLength, ' ', bufferLength - msgLength);
Fortran::runtime::memcpy(buffer, msg, msgLength);
Fortran::runtime::memset(buffer + msgLength, ' ', bufferLength - msgLength);
}
}
}
Expand Down
20 changes: 10 additions & 10 deletions flang/runtime/tools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ RT_API_ATTRS OwningPtr<char> SaveDefaultCharacter(
const char *s, std::size_t length, const Terminator &terminator) {
if (s) {
auto *p{static_cast<char *>(AllocateMemoryOrCrash(terminator, length + 1))};
std::memcpy(p, s, length);
Fortran::runtime::memcpy(p, s, length);
p[length] = '\0';
return OwningPtr<char>{p};
} else {
Expand Down Expand Up @@ -75,10 +75,10 @@ RT_API_ATTRS void ToFortranDefaultCharacter(
char *to, std::size_t toLength, const char *from) {
std::size_t len{Fortran::runtime::strlen(from)};
if (len < toLength) {
std::memcpy(to, from, len);
std::memset(to + len, ' ', toLength - len);
Fortran::runtime::memcpy(to, from, len);
Fortran::runtime::memset(to + len, ' ', toLength - len);
} else {
std::memcpy(to, from, toLength);
Fortran::runtime::memcpy(to, from, toLength);
}
}

Expand Down Expand Up @@ -122,7 +122,7 @@ RT_API_ATTRS void ShallowCopyDiscontiguousToDiscontiguous(
std::size_t elementBytes{to.ElementBytes()};
for (std::size_t n{to.Elements()}; n-- > 0;
to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
std::memcpy(
Fortran::runtime::memcpy(
to.Element<char>(toAt), from.Element<char>(fromAt), elementBytes);
}
}
Expand All @@ -135,7 +135,7 @@ RT_API_ATTRS void ShallowCopyDiscontiguousToContiguous(
std::size_t elementBytes{to.ElementBytes()};
for (std::size_t n{to.Elements()}; n-- > 0;
toAt += elementBytes, from.IncrementSubscripts(fromAt)) {
std::memcpy(toAt, from.Element<char>(fromAt), elementBytes);
Fortran::runtime::memcpy(toAt, from.Element<char>(fromAt), elementBytes);
}
}

Expand All @@ -147,15 +147,15 @@ RT_API_ATTRS void ShallowCopyContiguousToDiscontiguous(
std::size_t elementBytes{to.ElementBytes()};
for (std::size_t n{to.Elements()}; n-- > 0;
to.IncrementSubscripts(toAt), fromAt += elementBytes) {
std::memcpy(to.Element<char>(toAt), fromAt, elementBytes);
Fortran::runtime::memcpy(to.Element<char>(toAt), fromAt, elementBytes);
}
}

RT_API_ATTRS void ShallowCopy(const Descriptor &to, const Descriptor &from,
bool toIsContiguous, bool fromIsContiguous) {
if (toIsContiguous) {
if (fromIsContiguous) {
std::memcpy(to.OffsetElement(), from.OffsetElement(),
Fortran::runtime::memcpy(to.OffsetElement(), from.OffsetElement(),
to.Elements() * to.ElementBytes());
} else {
ShallowCopyDiscontiguousToContiguous(to, from);
Expand All @@ -177,7 +177,7 @@ RT_API_ATTRS char *EnsureNullTerminated(
char *str, std::size_t length, Terminator &terminator) {
if (runtime::memchr(str, '\0', length) == nullptr) {
char *newCmd{(char *)AllocateMemoryOrCrash(terminator, length + 1)};
std::memcpy(newCmd, str, length);
Fortran::runtime::memcpy(newCmd, str, length);
newCmd[length] = '\0';
return newCmd;
} else {
Expand Down Expand Up @@ -209,7 +209,7 @@ RT_API_ATTRS std::int32_t CopyCharsToDescriptor(const Descriptor &value,
return ToErrmsg(errmsg, StatValueTooShort);
}

std::memcpy(value.OffsetElement(offset), rawValue, toCopy);
Fortran::runtime::memcpy(value.OffsetElement(offset), rawValue, toCopy);

if (static_cast<std::int64_t>(rawValueLength) > toCopy) {
return ToErrmsg(errmsg, StatValueTooShort);
Expand Down

0 comments on commit c216291

Please sign in to comment.