24#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
25#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
27#if defined(__HIPCC_RTC__)
28 #define __HOST_DEVICE__ __device__
30 #define __HOST_DEVICE__ __host__ __device__
31 #include <hip/amd_detail/amd_hip_common.h>
33#if defined(__clang__) && defined(__HIP__)
34 #include "hip/amd_detail/amd_hip_atomic.h"
37 #if defined(__cplusplus)
39 #include <type_traits>
44#if defined(__clang__) && defined(__HIP__)
45 typedef _Float16 _Float16_2
__attribute__((ext_vector_type(2)));
49 static_assert(
sizeof(_Float16) ==
sizeof(
unsigned short),
"");
58 static_assert(
sizeof(_Float16_2) ==
sizeof(
unsigned short[2]),
"");
68 #if defined(__cplusplus)
69 #if !defined(__HIPCC_RTC__)
70 #include "hip_fp16_math_fwd.h"
71 #include "amd_hip_vector_types.h"
73 #include "amd_device_functions.h"
74 #include "amd_warp_functions.h"
78 template<>
struct is_floating_point<_Float16> : std::true_type {};
81 template<
bool cond,
typename T =
void>
82 using Enable_if_t =
typename std::enable_if<cond, T>::type;
88 static_assert(
sizeof(_Float16) ==
sizeof(
unsigned short),
"");
99 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
101 __half(
decltype(data) x) : data{x} {}
104 Enable_if_t<std::is_floating_point<T>{}>* =
nullptr>
106 __half(T x) : data{static_cast<_Float16>(x)} {}
109 __half(
const __half&) =
default;
111 __half(__half&&) =
default;
116 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
118 typename T, Enable_if_t<std::is_integral<T>{}>* =
nullptr>
120 __half(T x) : data{static_cast<_Float16>(x)} {}
125 __half& operator=(
const __half&) =
default;
127 __half& operator=(__half&&) =
default;
135 volatile __half& operator=(
const __half_raw& x)
volatile
140 volatile __half& operator=(
const volatile __half_raw& x)
volatile
150 volatile __half& operator=(
__half_raw&& x)
volatile
155 volatile __half& operator=(
volatile __half_raw&& x)
volatile
160 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
163 Enable_if_t<std::is_floating_point<T>{}>* =
nullptr>
165 __half& operator=(T x)
167 data =
static_cast<_Float16
>(x);
173 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
175 typename T, Enable_if_t<std::is_integral<T>{}>* =
nullptr>
177 __half& operator=(T x)
179 data =
static_cast<_Float16
>(x);
184 #if !defined(__HIP_NO_HALF_OPERATORS__)
210 __half&
operator++() { ++data;
return *
this; }
219 __half&
operator--() { --data;
return *
this; }
230 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
233 Enable_if_t<std::is_floating_point<T>{}>* =
nullptr>
235 operator T()
const {
return data; }
245 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
247 typename T, Enable_if_t<std::is_integral<T>{}>* =
nullptr>
249 operator T()
const {
return data; }
252 #if !defined(__HIP_NO_HALF_OPERATORS__)
254 __half
operator+()
const {
return *
this; }
259 tmp.data = -tmp.data;
265 #if !defined(__HIP_NO_HALF_OPERATORS__)
269 __half
operator+(
const __half& x,
const __half& y)
271 return __half{x} += y;
276 __half
operator-(
const __half& x,
const __half& y)
278 return __half{x} -= y;
283 __half
operator*(
const __half& x,
const __half& y)
285 return __half{x} *= y;
290 __half
operator/(
const __half& x,
const __half& y)
292 return __half{x} /= y;
297 bool operator==(
const __half& x,
const __half& y)
299 return x.data == y.data;
304 bool operator!=(
const __half& x,
const __half& y)
311 bool operator<(
const __half& x,
const __half& y)
313 return x.data < y.data;
318 bool operator>(
const __half& x,
const __half& y)
320 return y.data < x.data;
325 bool operator<=(
const __half& x,
const __half& y)
332 bool operator>=(
const __half& x,
const __half& y)
345 sizeof(_Float16_2) ==
sizeof(
unsigned short[2]),
"");
360 __half2(
decltype(data) xx) : data{xx} {}
362 __half2(
const __half& xx,
const __half& yy)
368 __half2(
const __half2&) =
default;
370 __half2(__half2&&) =
default;
372 ~__half2() =
default;
376 __half2& operator=(
const __half2&) =
default;
378 __half2& operator=(__half2&&) =
default;
387 #if !defined(__HIP_NO_HALF_OPERATORS__)
413 __half2&
operator++() {
return *
this += _Float16_2{1, 1}; }
422 __half2&
operator--() {
return *
this -= _Float16_2{1, 1}; }
434 operator decltype(data)()
const {
return data; }
443 #if !defined(__HIP_NO_HALF_OPERATORS__)
445 __half2
operator+()
const {
return *
this; }
450 tmp.data = -tmp.data;
456 #if !defined(__HIP_NO_HALF_OPERATORS__)
460 __half2
operator+(
const __half2& xx,
const __half2& yy)
462 return __half2{xx} += yy;
467 __half2
operator-(
const __half2& xx,
const __half2& yy)
469 return __half2{xx} -= yy;
474 __half2
operator*(
const __half2& xx,
const __half2& yy)
476 return __half2{xx} *= yy;
481 __half2
operator/(
const __half2& xx,
const __half2& yy)
483 return __half2{xx} /= yy;
488 bool operator==(
const __half2& xx,
const __half2& yy)
490 auto r = xx.data == yy.data;
491 return r.x != 0 && r.y != 0;
496 bool operator!=(
const __half2& xx,
const __half2& yy)
503 bool operator<(
const __half2& xx,
const __half2& yy)
505 auto r = xx.data < yy.data;
506 return r.x != 0 && r.y != 0;
511 bool operator>(
const __half2& xx,
const __half2& yy)
518 bool operator<=(
const __half2& xx,
const __half2& yy)
525 bool operator>=(
const __half2& xx,
const __half2& yy)
537 __half2 make_half2(__half x, __half y)
539 return __half2{x, y};
544 __half __low2half(__half2 x)
551 __half __high2half(__half2 x)
558 __half2 __half2half2(__half x)
560 return __half2{x, x};
565 __half2 __halves2half2(__half x, __half y)
567 return __half2{x, y};
572 __half2 __low2half2(__half2 x)
582 __half2 __high2half2(__half2 x)
592 __half2 __lows2half2(__half2 x, __half2 y)
602 __half2 __highs2half2(__half2 x, __half2 y)
623 short __half_as_short(__half x)
630 unsigned short __half_as_ushort(__half x)
637 __half __short_as_half(
short x)
645 __half __ushort_as_half(
unsigned short x)
654 __half __float2half(
float x)
660 __half __float2half_rn(
float x)
664 #if !defined(__HIPCC_RTC__)
668 __half __float2half_rz(
float x)
674 __half __float2half_rd(
float x)
680 __half __float2half_ru(
float x)
687 __half __float2half_rz(
float x)
693 __half __float2half_rd(
float x)
699 __half __float2half_ru(
float x)
705 __half2 __float2half2_rn(
float x)
709 static_cast<_Float16
>(x),
static_cast<_Float16
>(x)}};
713 __half2 __floats2half2_rn(
float x,
float y)
715 return __half2{_Float16_2{
716 static_cast<_Float16
>(x),
static_cast<_Float16
>(y)}};
720 __half2 __float22half2_rn(
float2 x)
722 return __floats2half2_rn(x.x, x.y);
728 float __half2float(__half x)
746 float2 __half22float2(__half2 x)
756 int __half2int_rn(__half x)
762 int __half2int_rz(__half x)
768 int __half2int_rd(__half x)
774 int __half2int_ru(__half x)
782 __half __int2half_rn(
int x)
788 __half __int2half_rz(
int x)
794 __half __int2half_rd(
int x)
800 __half __int2half_ru(
int x)
808 short __half2short_rn(__half x)
814 short __half2short_rz(__half x)
820 short __half2short_rd(__half x)
826 short __half2short_ru(__half x)
834 __half __short2half_rn(
short x)
840 __half __short2half_rz(
short x)
846 __half __short2half_rd(
short x)
852 __half __short2half_ru(
short x)
860 long long __half2ll_rn(__half x)
866 long long __half2ll_rz(__half x)
872 long long __half2ll_rd(__half x)
878 long long __half2ll_ru(__half x)
886 __half __ll2half_rn(
long long x)
892 __half __ll2half_rz(
long long x)
898 __half __ll2half_rd(
long long x)
904 __half __ll2half_ru(
long long x)
912 unsigned int __half2uint_rn(__half x)
918 unsigned int __half2uint_rz(__half x)
924 unsigned int __half2uint_rd(__half x)
930 unsigned int __half2uint_ru(__half x)
938 __half __uint2half_rn(
unsigned int x)
944 __half __uint2half_rz(
unsigned int x)
950 __half __uint2half_rd(
unsigned int x)
956 __half __uint2half_ru(
unsigned int x)
964 unsigned short __half2ushort_rn(__half x)
970 unsigned short __half2ushort_rz(__half x)
976 unsigned short __half2ushort_rd(__half x)
982 unsigned short __half2ushort_ru(__half x)
990 __half __ushort2half_rn(
unsigned short x)
996 __half __ushort2half_rz(
unsigned short x)
1002 __half __ushort2half_rd(
unsigned short x)
1008 __half __ushort2half_ru(
unsigned short x)
1016 unsigned long long __half2ull_rn(__half x)
1022 unsigned long long __half2ull_rz(__half x)
1028 unsigned long long __half2ull_rd(__half x)
1034 unsigned long long __half2ull_ru(__half x)
1042 __half __ull2half_rn(
unsigned long long x)
1048 __half __ull2half_rz(
unsigned long long x)
1054 __half __ull2half_rd(
unsigned long long x)
1060 __half __ull2half_ru(
unsigned long long x)
1068 __half __ldg(
const __half* ptr) {
return *ptr; }
1071 __half __ldcg(
const __half* ptr) {
return *ptr; }
1074 __half __ldca(
const __half* ptr) {
return *ptr; }
1077 __half __ldcs(
const __half* ptr) {
return *ptr; }
1081 __half2 __ldg(
const __half2* ptr) {
return *ptr; }
1084 __half2 __ldcg(
const __half2* ptr) {
return *ptr; }
1087 __half2 __ldca(
const __half2* ptr) {
return *ptr; }
1090 __half2 __ldcs(
const __half2* ptr) {
return *ptr; }
1095 bool __heq(__half x, __half y)
1102 bool __hne(__half x, __half y)
1109 bool __hle(__half x, __half y)
1116 bool __hge(__half x, __half y)
1123 bool __hlt(__half x, __half y)
1130 bool __hgt(__half x, __half y)
1136 bool __hequ(__half x, __half y) {
1141 bool __hneu(__half x, __half y) {
1145 bool __hleu(__half x, __half y) {
1150 bool __hgeu(__half x, __half y) {
1155 bool __hltu(__half x, __half y) {
1160 bool __hgtu(__half x, __half y) {
1166 __half2
__heq2(__half2 x, __half2 y)
1170 return __builtin_convertvector(-r, _Float16_2);
1174 __half2
__hne2(__half2 x, __half2 y)
1178 return __builtin_convertvector(-r, _Float16_2);
1182 __half2
__hle2(__half2 x, __half2 y)
1186 return __builtin_convertvector(-r, _Float16_2);
1190 __half2
__hge2(__half2 x, __half2 y)
1194 return __builtin_convertvector(-r, _Float16_2);
1198 __half2
__hlt2(__half2 x, __half2 y)
1202 return __builtin_convertvector(-r, _Float16_2);
1206 __half2
__hgt2(__half2 x, __half2 y)
1210 return __builtin_convertvector(-r, _Float16_2);
1212 inline __HOST_DEVICE__
1213 __half2 __hequ2(__half2 x, __half2 y) {
1216 return __builtin_convertvector(-r, _Float16_2);
1220 __half2 __hneu2(__half2 x, __half2 y) {
1222 return __builtin_convertvector(-r, _Float16_2);
1226 __half2 __hleu2(__half2 x, __half2 y) {
1228 return __builtin_convertvector(-r, _Float16_2);
1232 __half2 __hgeu2(__half2 x, __half2 y) {
1234 return __builtin_convertvector(-r, _Float16_2);
1238 __half2 __hltu2(__half2 x, __half2 y) {
1240 return __builtin_convertvector(-r, _Float16_2);
1244 __half2 __hgtu2(__half2 x, __half2 y) {
1246 return __builtin_convertvector(-r, _Float16_2);
1251 bool __hbeq2(__half2 x, __half2 y)
1254 return r.data.x != 0 && r.data.y != 0;
1258 bool __hbne2(__half2 x, __half2 y)
1261 return r.data.x != 0 && r.data.y != 0;
1265 bool __hble2(__half2 x, __half2 y)
1268 return r.data.x != 0 && r.data.y != 0;
1272 bool __hbge2(__half2 x, __half2 y)
1275 return r.data.x != 0 && r.data.y != 0;
1279 bool __hblt2(__half2 x, __half2 y)
1282 return r.data.x != 0 && r.data.y != 0;
1286 bool __hbgt2(__half2 x, __half2 y)
1289 return r.data.x != 0 && r.data.y != 0;
1311 __half
__hmax(
const __half x,
const __half y) {
1317 __half __hmax_nan(
const __half x,
const __half y) {
1318 if(__ocml_isnan_f16(
static_cast<__half_raw>(x).data)) {
1320 }
else if (__ocml_isnan_f16(
static_cast<__half_raw>(y).data)) {
1327 __half
__hmin(
const __half x,
const __half y) {
1333 __half __hmin_nan(
const __half x,
const __half y) {
1334 if(__ocml_isnan_f16(
static_cast<__half_raw>(x).data)) {
1336 }
else if (__ocml_isnan_f16(
static_cast<__half_raw>(y).data)) {
1345 __half __clamp_01(__half x)
1356 __half
__hadd(__half x, __half y)
1367 __ocml_fabs_f16(
static_cast<__half_raw>(x).data)};
1371 __half
__hsub(__half x, __half y)
1379 __half
__hmul(__half x, __half y)
1387 __half __hadd_sat(__half x, __half y)
1389 return __clamp_01(
__hadd(x, y));
1393 __half __hsub_sat(__half x, __half y)
1395 return __clamp_01(
__hsub(x, y));
1399 __half __hmul_sat(__half x, __half y)
1401 return __clamp_01(
__hmul(x, y));
1405 __half
__hfma(__half x, __half y, __half z)
1414 __half __hfma_sat(__half x, __half y, __half z)
1416 return __clamp_01(
__hfma(x, y, z));
1420 __half
__hdiv(__half x, __half y)
1429 __half2
__hadd2(__half2 x, __half2 y)
1440 __ocml_fabs_2f16(
static_cast<__half2_raw>(x).data)};
1444 __half2
__hsub2(__half2 x, __half2 y)
1452 __half2
__hmul2(__half2 x, __half2 y)
1460 __half2 __hadd2_sat(__half2 x, __half2 y)
1469 __half2 __hsub2_sat(__half2 x, __half2 y)
1478 __half2 __hmul2_sat(__half2 x, __half2 y)
1487 __half2
__hfma2(__half2 x, __half2 y, __half2 z)
1489 return __half2{__ocml_fma_2f16(x, y, z)};
1493 __half2 __hfma2_sat(__half2 x, __half2 y, __half2 z)
1502 __half2
__h2div(__half2 x, __half2 y)
1510 #if defined(__clang__) && defined(__HIP__)
1511 inline __device__ __half2 unsafeAtomicAdd(__half2* address, __half2 value) {
1512 #if __has_builtin(__builtin_amdgcn_flat_atomic_fadd_v2f16)
1514 typedef _Float16
__attribute__((ext_vector_type(2))) vec_fp162;
1515 static_assert(sizeof(vec_fp162) == sizeof(
__half2_raw));
1521 __builtin_amdgcn_flat_atomic_fadd_v2f16((vec_fp162*)address, u.fp16);
1522 return static_cast<__half2
>(u.h2r);
1524 static_assert(
sizeof(
__half2_raw) ==
sizeof(
unsigned int));
1529 u_hold old_val, new_val;
1530 old_val.u32 = __hip_atomic_load((
unsigned int*)address, __ATOMIC_RELAXED,
1531 __HIP_MEMORY_SCOPE_AGENT);
1533 new_val.h2r =
__hadd2(old_val.h2r, value);
1534 }
while (!__hip_atomic_compare_exchange_strong(
1535 (
unsigned int*)address, &old_val.u32, new_val.u32, __ATOMIC_RELAXED,
1536 __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT));
1543 #if defined(__clang__) && defined(__HIP__)
1546 float amd_mixed_dot(__half2 a, __half2 b,
float c,
bool saturate) {
1547 return __ockl_fdot2(
static_cast<__half2_raw>(a).data,
1557 __ocml_trunc_f16(
static_cast<__half_raw>(x).data)};
1561 __half
hceil(__half x)
1564 __ocml_ceil_f16(
static_cast<__half_raw>(x).data)};
1571 __ocml_floor_f16(
static_cast<__half_raw>(x).data)};
1575 __half
hrint(__half x)
1578 __ocml_rint_f16(
static_cast<__half_raw>(x).data)};
1582 __half
hsin(__half x)
1585 __ocml_sin_f16(
static_cast<__half_raw>(x).data)};
1589 __half
hcos(__half x)
1592 __ocml_cos_f16(
static_cast<__half_raw>(x).data)};
1596 __half
hexp(__half x)
1599 __ocml_exp_f16(
static_cast<__half_raw>(x).data)};
1603 __half
hexp2(__half x)
1606 __ocml_exp2_f16(
static_cast<__half_raw>(x).data)};
1613 __ocml_exp10_f16(
static_cast<__half_raw>(x).data)};
1617 __half
hlog2(__half x)
1620 __ocml_log2_f16(
static_cast<__half_raw>(x).data)};
1624 __half
hlog(__half x)
1627 __ocml_log_f16(
static_cast<__half_raw>(x).data)};
1634 __ocml_log10_f16(
static_cast<__half_raw>(x).data)};
1638 __half
hrcp(__half x)
1641 static_cast<_Float16
>(1.0f) /
static_cast<__half_raw>(x).data};
1648 __ocml_rsqrt_f16(
static_cast<__half_raw>(x).data)};
1652 __half
hsqrt(__half x)
1655 __ocml_sqrt_f16(
static_cast<__half_raw>(x).data)};
1661 return __ocml_isinf_f16(
static_cast<__half_raw>(x).data);
1667 return __ocml_isnan_f16(
static_cast<__half_raw>(x).data);
1680 return __half2{__ocml_trunc_2f16(x)};
1684 __half2
h2ceil(__half2 x)
1686 return __half2{__ocml_ceil_2f16(x)};
1692 return __half2{__ocml_floor_2f16(x)};
1696 __half2
h2rint(__half2 x)
1698 return __half2{__ocml_rint_2f16(x)};
1702 __half2
h2sin(__half2 x)
1704 return __half2{__ocml_sin_2f16(x)};
1708 __half2
h2cos(__half2 x)
1710 return __half2{__ocml_cos_2f16(x)};
1714 __half2
h2exp(__half2 x)
1716 return __half2{__ocml_exp_2f16(x)};
1720 __half2
h2exp2(__half2 x)
1722 return __half2{__ocml_exp2_2f16(x)};
1728 return __half2{__ocml_exp10_2f16(x)};
1732 __half2
h2log2(__half2 x)
1734 return __half2{__ocml_log2_2f16(x)};
1738 __half2
h2log(__half2 x) {
return __ocml_log_2f16(x); }
1741 __half2
h2log10(__half2 x) {
return __ocml_log10_2f16(x); }
1744 __half2
h2rcp(__half2 x) {
1746 _Float16_2{
static_cast<_Float16
>(1.0f),
static_cast<_Float16
>(1.0f)} / x.data};
1750 __half2
h2rsqrt(__half2 x) {
return __ocml_rsqrt_2f16(x); }
1753 __half2
h2sqrt(__half2 x) {
return __ocml_sqrt_2f16(x); }
1756 __half2 __hisinf2(__half2 x)
1758 auto r = __ocml_isinf_2f16(x);
1759 return __half2{_Float16_2{
1760 static_cast<_Float16
>(r.x),
static_cast<_Float16
>(r.y)}};
1766 auto r = __ocml_isnan_2f16(x);
1767 return __half2{_Float16_2{
1768 static_cast<_Float16
>(r.x),
static_cast<_Float16
>(r.y)}};
1774 return __half2{-
static_cast<__half2_raw>(x).data};
1778 #if !defined(HIP_NO_HALF)
1779 using half = __half;
1780 using half2 = __half2;
1784 __half __shfl(__half var,
int src_lane,
int width = warpSize) {
1785 union {
int i; __half h; } tmp; tmp.h = var;
1786 tmp.i = __shfl(tmp.i, src_lane, width);
1791 __half2 __shfl(__half2 var,
int src_lane,
int width = warpSize) {
1792 union {
int i; __half2 h; } tmp; tmp.h = var;
1793 tmp.i = __shfl(tmp.i, src_lane, width);
1798 __half __shfl_up(__half var,
unsigned int lane_delta,
int width = warpSize) {
1799 union {
int i; __half h; } tmp; tmp.h = var;
1800 tmp.i = __shfl_up(tmp.i, lane_delta, width);
1805 __half2 __shfl_up(__half2 var,
unsigned int lane_delta,
int width = warpSize) {
1806 union {
int i; __half2 h; } tmp; tmp.h = var;
1807 tmp.i = __shfl_up(tmp.i, lane_delta, width);
1812 __half __shfl_down(__half var,
unsigned int lane_delta,
int width = warpSize) {
1813 union {
int i; __half h; } tmp; tmp.h = var;
1814 tmp.i = __shfl_down(tmp.i, lane_delta, width);
1819 __half2 __shfl_down(__half2 var,
unsigned int lane_delta,
int width = warpSize) {
1820 union {
int i; __half2 h; } tmp; tmp.h = var;
1821 tmp.i = __shfl_down(tmp.i, lane_delta, width);
1826 __half __shfl_xor(__half var,
int lane_mask,
int width = warpSize) {
1827 union {
int i; __half h; } tmp; tmp.h = var;
1828 tmp.i = __shfl_xor(tmp.i, lane_mask, width);
1833 __half2 __shfl_xor(__half2 var,
int lane_mask,
int width = warpSize) {
1834 union {
int i; __half2 h; } tmp; tmp.h = var;
1835 tmp.i = __shfl_xor(tmp.i, lane_mask, width);
1839#elif defined(__GNUC__) || defined(_MSC_VER)
1840 #if !defined(__HIPCC_RTC__)
1841 #include "hip_fp16_gcc.h"
#define __host__
Definition host_defines.h:170
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b)
Subtracts two bfloat16 values.
Definition amd_hip_bf16.h:805
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator-=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to subtract-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1016
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator+(const __hip_bfloat16 &l)
Operator to unary+ on a __hip_bfloat16 number.
Definition amd_hip_bf16.h:939
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator/(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1025
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator-(const __hip_bfloat16 &l)
Operator to negate a __hip_bfloat16 number.
Definition amd_hip_bf16.h:954
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hneg(const __hip_bfloat16 a)
Negate a bfloat16 value.
Definition amd_hip_bf16.h:839
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b)
Adds two bfloat16 values.
Definition amd_hip_bf16.h:797
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator/=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1034
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator*(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:921
__BF16_DEVICE_STATIC__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b, const __hip_bfloat16 c)
Performs FMA of given bfloat16 values.
Definition amd_hip_bf16.h:821
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator*=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:930
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b)
Multiplies two bfloat16 values.
Definition amd_hip_bf16.h:831
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator++(__hip_bfloat16 &l, const int)
Operator to post increment a __hip_bfloat16 number.
Definition amd_hip_bf16.h:969
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator+=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to add-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1007
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __habs(const __hip_bfloat16 a)
Returns absolute of a bfloat16.
Definition amd_hip_bf16.h:849
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator--(__hip_bfloat16 &l, const int)
Operator to post decrement a __hip_bfloat16 number.
Definition amd_hip_bf16.h:988
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b)
Divides two bfloat16 values.
Definition amd_hip_bf16.h:813
__BF16_HOST_DEVICE_STATIC__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values.
Definition amd_hip_bf16.h:1171
__BF16_HOST_DEVICE_STATIC__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than.
Definition amd_hip_bf16.h:1188
__BF16_HOST_DEVICE_STATIC__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - not equal.
Definition amd_hip_bf16.h:1220
__BF16_HOST_DEVICE_STATIC__ bool operator==(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform an equal compare on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1493
__BF16_HOST_DEVICE_STATIC__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than equal.
Definition amd_hip_bf16.h:1276
__BF16_HOST_DEVICE_STATIC__ bool operator!=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a not equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1501
__BF16_HOST_DEVICE_STATIC__ bool operator>(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1525
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return min.
Definition amd_hip_bf16.h:1248
__BF16_HOST_DEVICE_STATIC__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than equal.
Definition amd_hip_bf16.h:1204
__BF16_HOST_DEVICE_STATIC__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than equal.
Definition amd_hip_bf16.h:1284
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return max.
Definition amd_hip_bf16.h:1236
__BF16_HOST_DEVICE_STATIC__ bool operator<=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1517
__BF16_HOST_DEVICE_STATIC__ int __hisinf(const __hip_bfloat16 a)
Checks if number is inf.
Definition amd_hip_bf16.h:1292
__BF16_HOST_DEVICE_STATIC__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered not equal.
Definition amd_hip_bf16.h:1228
__BF16_HOST_DEVICE_STATIC__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than.
Definition amd_hip_bf16.h:1196
__BF16_HOST_DEVICE_STATIC__ bool operator<(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1509
__BF16_HOST_DEVICE_STATIC__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than.
Definition amd_hip_bf16.h:1268
__BF16_HOST_DEVICE_STATIC__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than equal.
Definition amd_hip_bf16.h:1212
__BF16_HOST_DEVICE_STATIC__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than operator.
Definition amd_hip_bf16.h:1260
__BF16_HOST_DEVICE_STATIC__ bool operator>=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1533
__BF16_HOST_DEVICE_STATIC__ bool __hisnan(const __hip_bfloat16 a)
Checks if number is nan.
Definition amd_hip_bf16.h:1301
__BF16_HOST_DEVICE_STATIC__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered equal.
Definition amd_hip_bf16.h:1179
__BF16_HOST_DEVICE_STATIC__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b - unordered.
Definition amd_hip_bf16.h:1334
__BF16_HOST_DEVICE_STATIC__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b.
Definition amd_hip_bf16.h:1342
__BF16_HOST_DEVICE_STATIC__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b.
Definition amd_hip_bf16.h:1358
__BF16_HOST_DEVICE_STATIC__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b.
Definition amd_hip_bf16.h:1374
__BF16_HOST_DEVICE_STATIC__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b - unordered.
Definition amd_hip_bf16.h:1366
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks for not equal to.
Definition amd_hip_bf16.h:1483
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1416
__BF16_HOST_DEVICE_STATIC__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:1390
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1455
__BF16_HOST_DEVICE_STATIC__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b - unordered.
Definition amd_hip_bf16.h:1382
__BF16_HOST_DEVICE_STATIC__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal - unordered.
Definition amd_hip_bf16.h:1318
__BF16_HOST_DEVICE_STATIC__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal.
Definition amd_hip_bf16.h:1310
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1445
__BF16_HOST_DEVICE_STATIC__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:1398
__BF16_HOST_DEVICE_STATIC__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b - unordered.
Definition amd_hip_bf16.h:1350
__BF16_HOST_DEVICE_STATIC__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b.
Definition amd_hip_bf16.h:1326
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b, returns 1.0 if equal, otherwise 0.0.
Definition amd_hip_bf16.h:1406
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1426
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a)
Check for a is NaN, returns 1.0 if NaN, otherwise 0.0.
Definition amd_hip_bf16.h:1436
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __habs2(const __hip_bfloat162 a)
Returns absolute of a bfloat162.
Definition amd_hip_bf16.h:869
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Subtracts two bfloat162 values.
Definition amd_hip_bf16.h:912
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Multiplies two bfloat162 values.
Definition amd_hip_bf16.h:895
__BF16_DEVICE_STATIC__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat162 b, const __hip_bfloat162 c)
Performs FMA of given bfloat162 values.
Definition amd_hip_bf16.h:886
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Adds two bfloat162 values.
Definition amd_hip_bf16.h:877
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a)
Converts a bfloat162 into negative.
Definition amd_hip_bf16.h:904
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b)
Divides bfloat162 values.
Definition amd_hip_bf16.h:859
__BF16_HOST_DEVICE_STATIC__ float __low2float(const __hip_bfloat162 a)
Converts low 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:636
__BF16_HOST_DEVICE_STATIC__ float __high2float(const __hip_bfloat162 a)
Converts high 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:605
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a)
Swaps both halves.
Definition amd_hip_bf16.h:644
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp10(const __hip_bfloat16 h)
Calculate exponential 10 of bfloat16.
Definition amd_hip_bf16.h:1618
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog(const __hip_bfloat16 h)
Calculate natural log of bfloat16.
Definition amd_hip_bf16.h:1642
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp2(const __hip_bfloat16 h)
Calculate exponential 2 of bfloat16.
Definition amd_hip_bf16.h:1626
__BF16_DEVICE_STATIC__ __hip_bfloat16 hceil(const __hip_bfloat16 h)
Calculate ceil of bfloat16.
Definition amd_hip_bf16.h:1594
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrcp(const __hip_bfloat16 h)
Calculate reciprocal.
Definition amd_hip_bf16.h:1666
__BF16_DEVICE_STATIC__ __hip_bfloat16 hsqrt(const __hip_bfloat16 h)
Calculate sqrt of bfloat16.
Definition amd_hip_bf16.h:1698
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog10(const __hip_bfloat16 h)
Calculate log 10 of bfloat16.
Definition amd_hip_bf16.h:1650
__BF16_DEVICE_STATIC__ __hip_bfloat16 hsin(const __hip_bfloat16 h)
Calculate sin of bfloat16.
Definition amd_hip_bf16.h:1690
__BF16_DEVICE_STATIC__ __hip_bfloat16 hfloor(const __hip_bfloat16 h)
Calculate floor of bfloat16.
Definition amd_hip_bf16.h:1634
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrint(const __hip_bfloat16 h)
Round to nearest int.
Definition amd_hip_bf16.h:1674
__BF16_DEVICE_STATIC__ __hip_bfloat16 htrunc(const __hip_bfloat16 h)
Calculate truncate of bfloat16.
Definition amd_hip_bf16.h:1706
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrsqrt(const __hip_bfloat16 h)
Reciprocal square root.
Definition amd_hip_bf16.h:1682
__BF16_DEVICE_STATIC__ __hip_bfloat16 hcos(const __hip_bfloat16 h)
Calculate cosine of bfloat16.
Definition amd_hip_bf16.h:1602
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog2(const __hip_bfloat16 h)
Calculate log 2 of bfloat16.
Definition amd_hip_bf16.h:1658
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp(const __hip_bfloat16 h)
Calculate exponential of bfloat16.
Definition amd_hip_bf16.h:1610
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h)
Calculate truncate of bfloat162.
Definition amd_hip_bf16.h:1826
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rcp(const __hip_bfloat162 h)
Calculate vector reciprocal.
Definition amd_hip_bf16.h:1786
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log(const __hip_bfloat162 h)
Calculate natural log of bfloat162.
Definition amd_hip_bf16.h:1762
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp(const __hip_bfloat162 h)
Calculate exponential of bfloat162.
Definition amd_hip_bf16.h:1730
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2cos(const __hip_bfloat162 h)
Calculate cosine of bfloat162.
Definition amd_hip_bf16.h:1722
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sin(const __hip_bfloat162 h)
Calculate sin of bfloat162.
Definition amd_hip_bf16.h:1810
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log2(const __hip_bfloat162 h)
Calculate log 2 of bfloat162.
Definition amd_hip_bf16.h:1778
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2ceil(const __hip_bfloat162 h)
Calculate ceil of bfloat162.
Definition amd_hip_bf16.h:1714
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2floor(const __hip_bfloat162 h)
Calculate floor of bfloat162.
Definition amd_hip_bf16.h:1754
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp10(const __hip_bfloat162 h)
Calculate exponential 10 of bfloat162.
Definition amd_hip_bf16.h:1738
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp2(const __hip_bfloat162 h)
Calculate exponential 2 of bfloat162.
Definition amd_hip_bf16.h:1746
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log10(const __hip_bfloat162 h)
Calculate log 10 of bfloat162.
Definition amd_hip_bf16.h:1770
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rsqrt(const __hip_bfloat162 h)
Calculate vector reciprocal square root.
Definition amd_hip_bf16.h:1802
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rint(const __hip_bfloat162 h)
Calculate vector round to nearest int.
Definition amd_hip_bf16.h:1794
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h)
Calculate sqrt of bfloat162.
Definition amd_hip_bf16.h:1818
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Definition hip_fp16_math_fwd.h:57
Definition amd_hip_vector_types.h:2035
Definition hip_fp16_gcc.h:7
Definition hip_fp16_gcc.h:11