-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFloat16m7e8s1.h
75 lines (63 loc) · 4.05 KB
/
Float16m7e8s1.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
//-----------------------------------------------------------------------------
//
// The typical 'half' float16 data type (IEEE 754-2008) uses the following bit
// allocation: mantissa:10 exponent:5 sign:1.
// https://en.wikipedia.org/wiki/Half-precision_floating-point_format
//
// An alternate float16 is essentially float32 (IEEE 754-2008) with the lowest
// 16 of 23 mantissa bits chopped off: mantissa:7 exponent:8 sign:1
// https://en.wikipedia.org/wiki/Bfloat16_floating-point_format
//
//-----------------------------------------------------------------------------
#pragma once
#if 0 // TODO: Enable after adding test cases.
using float16m7e8s1_t = FloatNumber<uint16_t, 7, 8, true, true, true, true>;
#else
struct float16m7e8s1_t
{
float16m7e8s1_t() = default;
float16m7e8s1_t(const float16m7e8s1_t&) = default;
float16m7e8s1_t(float16m7e8s1_t&&) = default;
float16m7e8s1_t(float floatValue) noexcept
{
value = reinterpret_cast<uint32_t&>(floatValue) >> 16;
}
float16m7e8s1_t& operator =(const float16m7e8s1_t&) = default;
float16m7e8s1_t& operator =(float floatValue) noexcept
{
value = reinterpret_cast<uint32_t&>(floatValue) >> 16;
return *this;
}
operator float() const noexcept
{
float floatValue = 0.0;
reinterpret_cast<uint32_t&>(floatValue) = value << 16;
return floatValue;
}
uint16_t value;
};
#endif
inline float16m7e8s1_t operator +(float16m7e8s1_t a, float16m7e8s1_t b) noexcept { return float(a) + float(b); }
inline float16m7e8s1_t operator -(float16m7e8s1_t a, float16m7e8s1_t b) noexcept { return float(a) - float(b); }
inline float16m7e8s1_t operator *(float16m7e8s1_t a, float16m7e8s1_t b) noexcept { return float(a) * float(b); }
inline float16m7e8s1_t operator /(float16m7e8s1_t a, float16m7e8s1_t b) noexcept { return float(a) / float(b); }
inline float16m7e8s1_t operator +(float16m7e8s1_t a, double b) noexcept { return float(a) + float(b); }
inline float16m7e8s1_t operator -(float16m7e8s1_t a, double b) noexcept { return float(a) - float(b); }
inline float16m7e8s1_t operator *(float16m7e8s1_t a, double b) noexcept { return float(a) * float(b); }
inline float16m7e8s1_t operator /(float16m7e8s1_t a, double b) noexcept { return float(a) / float(b); }
inline float16m7e8s1_t operator +(double a, float16m7e8s1_t b) noexcept { return float(a) + float(b); }
inline float16m7e8s1_t operator -(double a, float16m7e8s1_t b) noexcept { return float(a) - float(b); }
inline float16m7e8s1_t operator *(double a, float16m7e8s1_t b) noexcept { return float(a) * float(b); }
inline float16m7e8s1_t operator /(double a, float16m7e8s1_t b) noexcept { return float(a) / float(b); }
inline float16m7e8s1_t& operator +=(float16m7e8s1_t& a, float16m7e8s1_t b) noexcept { return a = (float(a) + float(b)); }
inline float16m7e8s1_t& operator -=(float16m7e8s1_t& a, float16m7e8s1_t b) noexcept { return a = (float(a) - float(b)); }
inline float16m7e8s1_t& operator *=(float16m7e8s1_t& a, float16m7e8s1_t b) noexcept { return a = (float(a) * float(b)); }
inline float16m7e8s1_t& operator /=(float16m7e8s1_t& a, float16m7e8s1_t b) noexcept { return a = (float(a) / float(b)); }
inline float16m7e8s1_t& operator ++(float16m7e8s1_t& a) noexcept { return a = float(a) + 1; }
inline float16m7e8s1_t& operator --(float16m7e8s1_t& a) noexcept { return a = float(a) + 1; }
inline bool operator==(float16m7e8s1_t lhs, float16m7e8s1_t rhs) noexcept { return float(lhs) == float(rhs); }
inline bool operator!=(float16m7e8s1_t lhs, float16m7e8s1_t rhs) noexcept { return float(lhs) != float(rhs); }
inline bool operator< (float16m7e8s1_t lhs, float16m7e8s1_t rhs) noexcept { return float(lhs) < float(rhs); }
inline bool operator> (float16m7e8s1_t lhs, float16m7e8s1_t rhs) noexcept { return float(lhs) > float(rhs); }
inline bool operator<=(float16m7e8s1_t lhs, float16m7e8s1_t rhs) noexcept { return float(lhs) <= float(rhs); }
inline bool operator>=(float16m7e8s1_t lhs, float16m7e8s1_t rhs) noexcept { return float(lhs) >= float(rhs); }