From 46a50e80251a85340ef3327c0ffc5da2197a1abb Mon Sep 17 00:00:00 2001 From: Lee Killough Date: Wed, 17 Jul 2019 18:24:35 -0400 Subject: [PATCH 1/2] Use a similar method as in rocBLAS to determine whether to use C++ and HIP features in tensile_bfloat16.h --- Tensile/Source/tensile_bfloat16.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/Tensile/Source/tensile_bfloat16.h b/Tensile/Source/tensile_bfloat16.h index ccb1ba50f..aa8325b3c 100644 --- a/Tensile/Source/tensile_bfloat16.h +++ b/Tensile/Source/tensile_bfloat16.h @@ -1,7 +1,7 @@ /** * MIT License * - * Copyright (C) 2019 Advanced Micro Devices, Inc. All rights reserved. + * Copyright 2019 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -30,22 +30,23 @@ #ifndef _TENSILE_BFLOAT16_H_ #define _TENSILE_BFLOAT16_H_ -#ifndef __cplusplus - -#include +// If this is a C compiler, C++ compiler below C++11, or a host-only compiler, +// we only include a minimal definition of tensile_bfloat16 +#if __cplusplus < 201103L || !defined(__HCC__) +#include typedef struct { uint16_t data; } tensile_bfloat16; -#else // __cplusplus - -#include +#else // __cplusplus < 201103L || !defined(__HCC__) -#include #include -#include +#include +#include +#include +#include #include struct tensile_bfloat16 @@ -57,7 +58,7 @@ struct tensile_bfloat16 __host__ __device__ tensile_bfloat16() {} // round upper 16 bits of IEEE float to convert to bfloat16 - explicit __host__ __device__ tensile_bfloat16(float f) : data(float_to_bfloat16(f)) {} + explicit __host__ __device__ tensile_bfloat16(float f) : data(float_to_bfloat16(f)) { } // zero extend lower 16 bits of bfloat16 to convert to IEEE float explicit __host__ __device__ operator float() const @@ -65,17 +66,17 @@ struct tensile_bfloat16 union { uint32_t int32; - float fp32; + float fp32; } u = {uint32_t(data) << 16}; return u.fp32; } - private: +private: static __host__ __device__ uint16_t float_to_bfloat16(float f) { union { - float fp32; + float fp32; uint32_t int32; } u = {f}; if(~u.int32 & 0x7f800000) @@ -191,6 +192,6 @@ inline __host__ __device__ tensile_bfloat16 abs(tensile_bfloat16 a) inline tensile_bfloat16 sin(tensile_bfloat16 a) { return tensile_bfloat16(sinf(float(a))); } inline tensile_bfloat16 cos(tensile_bfloat16 a) { return tensile_bfloat16(cosf(float(a))); } -#endif // __cplusplus +#endif // __cplusplus < 201103L || !defined(__HCC__) #endif // _TENSILE_BFLOAT16_H_ From 61bb3916cf87065f7e3f9f89f1db7cb50127410f Mon Sep 17 00:00:00 2001 From: Lee Killough Date: Sun, 1 Sep 2019 17:30:57 -0400 Subject: [PATCH 2/2] Make bfloat16 changes similar to rocBLAS PR #678 --- Tensile/Source/MathTemplates.cpp | 39 ++++++++++++++----------------- Tensile/Source/tensile_bfloat16.h | 7 +----- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/Tensile/Source/MathTemplates.cpp b/Tensile/Source/MathTemplates.cpp index d24960fe1..5268c2bb1 100644 --- a/Tensile/Source/MathTemplates.cpp +++ b/Tensile/Source/MathTemplates.cpp @@ -86,15 +86,15 @@ template<> TensileComplexDouble tensileGetOne() { #ifdef Tensile_ENABLE_HALF template<> TensileHalf tensileGetRandom() { return static_cast((rand()%7) - 3); } #endif -template<> uint32_t tensileGetRandom() { - int8_t t0 = static_cast((rand()%7) - 3); - int8_t t1 = static_cast((rand()%7) - 3); - int8_t t2 = static_cast((rand()%7) - 3); - int8_t t3 = static_cast((rand()%7) - 3); +template<> uint32_t tensileGetRandom() { + int8_t t0 = static_cast((rand()%7) - 3); + int8_t t1 = static_cast((rand()%7) - 3); + int8_t t2 = static_cast((rand()%7) - 3); + int8_t t3 = static_cast((rand()%7) - 3); int8_t t1x4[4] = {t0, t1, t2, t3}; - uint32_t tmp; + uint32_t tmp; memcpy(&tmp, t1x4, sizeof(uint32_t)); - return tmp; + return tmp; } template<> int32_t tensileGetRandom() { return static_cast((rand()%7) - 3); } template<> float tensileGetRandom() { return static_cast((rand()%201) - 100); } @@ -142,20 +142,20 @@ template<> TensileComplexDouble tensileGetTypeForInt( size #ifdef Tensile_ENABLE_HALF template<> TensileHalf tensileGetTrig(int i) { return static_cast(sin(i)); } #endif -template<> uint32_t tensileGetTrig(int i) { - int8_t t0 = static_cast((rand()%7) - 3); - int8_t t1 = static_cast((rand()%7) - 3); - int8_t t2 = static_cast((rand()%7) - 3); - int8_t t3 = static_cast((rand()%7) - 3); +template<> uint32_t tensileGetTrig(int i) { + int8_t t0 = static_cast((rand()%7) - 3); + int8_t t1 = static_cast((rand()%7) - 3); + int8_t t2 = static_cast((rand()%7) - 3); + int8_t t3 = static_cast((rand()%7) - 3); int8_t t1x4[4] = {t0, t1, t2, t3}; - uint32_t tmp; + uint32_t tmp; memcpy(&tmp, t1x4, sizeof(uint32_t)); - return tmp; + return tmp; } -template<> int32_t tensileGetTrig(int i) { return static_cast((rand()%7) - 3); } -template<> float tensileGetTrig(int i) { return static_cast(sin(i)); } -template<> tensile_bfloat16 tensileGetTrig(int i) { return sin(static_cast(i)); } -template<> double tensileGetTrig(int i) { return static_cast(sin(i)); } +template<> int32_t tensileGetTrig(int i) { return rand() % 7 - 3; } +template<> float tensileGetTrig(int i) { return sin(i); } +template<> tensile_bfloat16 tensileGetTrig(int i) { return tensile_bfloat16(sinf(i)); } +template<> double tensileGetTrig(int i) { return sin(i); } template<> TensileComplexFloat tensileGetTrig(int i) { TensileComplexFloat r; TENSILEREAL(r) = tensileGetTrig(i); @@ -531,6 +531,3 @@ template<> std::string tensileToString(TensileHalf v){ #endif template<> std::string tensileToString(tensile_bfloat16 v){ return tensileToString(static_cast(v)); } - - - diff --git a/Tensile/Source/tensile_bfloat16.h b/Tensile/Source/tensile_bfloat16.h index c806b6884..3bef81a58 100644 --- a/Tensile/Source/tensile_bfloat16.h +++ b/Tensile/Source/tensile_bfloat16.h @@ -61,7 +61,7 @@ struct tensile_bfloat16 explicit __host__ __device__ tensile_bfloat16(float f) : data(float_to_bfloat16(f)) { } // zero extend lower 16 bits of bfloat16 to convert to IEEE float - explicit __host__ __device__ operator float() const + __host__ __device__ operator float() const { union { @@ -184,11 +184,6 @@ inline __host__ __device__ tensile_bfloat16 operator--(tensile_bfloat16& a, int) inline __host__ __device__ bool isinf(tensile_bfloat16 a) { return !(~a.data & 0x7f80) && !(a.data & 0x7f); } inline __host__ __device__ bool isnan(tensile_bfloat16 a) { return !(~a.data & 0x7f80) && +(a.data & 0x7f); } inline __host__ __device__ bool iszero(tensile_bfloat16 a) { return !(a.data & 0x7fff); } -inline __host__ __device__ tensile_bfloat16 abs(tensile_bfloat16 a) -{ - a.data &= 0x7fff; - return a; -} inline tensile_bfloat16 sin(tensile_bfloat16 a) { return tensile_bfloat16(sinf(float(a))); } inline tensile_bfloat16 cos(tensile_bfloat16 a) { return tensile_bfloat16(cosf(float(a))); }