HIP: Heterogenous-computing Interface for Portability
Loading...
Searching...
No Matches
amd_hip_fp16.h
1/*
2Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
3
4Permission is hereby granted, free of charge, to any person obtaining a copy
5of this software and associated documentation files (the "Software"), to deal
6in the Software without restriction, including without limitation the rights
7to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8copies of the Software, and to permit persons to whom the Software is
9furnished to do so, subject to the following conditions:
10
11The above copyright notice and this permission notice shall be included in
12all copies or substantial portions of the Software.
13
14THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20THE SOFTWARE.
21*/
22
23#pragma once
24#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
25#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
26
27#if defined(__HIPCC_RTC__)
28 #define __HOST_DEVICE__ __device__
29#else
30 #define __HOST_DEVICE__ __host__ __device__
31 #include <hip/amd_detail/amd_hip_common.h>
33#if defined(__clang__) && defined(__HIP__)
34 #include "hip/amd_detail/amd_hip_atomic.h"
35#endif // defined(__clang__) && defined(__HIP__)
36 #include <assert.h>
37 #if defined(__cplusplus)
38 #include <algorithm>
39 #include <type_traits>
40 #include <utility>
41#endif
42#endif // !defined(__HIPCC_RTC__)
43
44#if defined(__clang__) && defined(__HIP__)
45 typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2)));
46
47 struct __half_raw {
48 union {
49 static_assert(sizeof(_Float16) == sizeof(unsigned short), "");
50
51 _Float16 data;
52 unsigned short x;
53 };
54 };
55
56 struct __half2_raw {
57 union {
58 static_assert(sizeof(_Float16_2) == sizeof(unsigned short[2]), "");
59
60 struct {
61 __half_raw x;
62 __half_raw y;
63 };
64 _Float16_2 data;
65 };
66 };
67
68 #if defined(__cplusplus)
69 #if !defined(__HIPCC_RTC__)
70 #include "hip_fp16_math_fwd.h"
71 #include "amd_hip_vector_types.h"
72 #include "host_defines.h"
73 #include "amd_device_functions.h"
74 #include "amd_warp_functions.h"
75 #endif
76 namespace std
77 {
78 template<> struct is_floating_point<_Float16> : std::true_type {};
79 }
80
81 template<bool cond, typename T = void>
82 using Enable_if_t = typename std::enable_if<cond, T>::type;
83
84 // BEGIN STRUCT __HALF
85 struct __half {
86 protected:
87 union {
88 static_assert(sizeof(_Float16) == sizeof(unsigned short), "");
89
90 _Float16 data;
91 unsigned short __x;
92 };
93 public:
94 // CREATORS
95 __HOST_DEVICE__
96 __half() = default;
97 __HOST_DEVICE__
98 __half(const __half_raw& x) : data{x.data} {}
99 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
100 __HOST_DEVICE__
101 __half(decltype(data) x) : data{x} {}
102 template<
103 typename T,
104 Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
105 __HOST_DEVICE__
106 __half(T x) : data{static_cast<_Float16>(x)} {}
107 #endif
108 __HOST_DEVICE__
109 __half(const __half&) = default;
110 __HOST_DEVICE__
111 __half(__half&&) = default;
112 __HOST_DEVICE__
113 ~__half() = default;
114
115 // CREATORS - DEVICE ONLY
116 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
117 template<
118 typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
119 __HOST_DEVICE__
120 __half(T x) : data{static_cast<_Float16>(x)} {}
121 #endif
122
123 // MANIPULATORS
124 __HOST_DEVICE__
125 __half& operator=(const __half&) = default;
126 __HOST_DEVICE__
127 __half& operator=(__half&&) = default;
128 __HOST_DEVICE__
129 __half& operator=(const __half_raw& x)
130 {
131 data = x.data;
132 return *this;
133 }
134 __HOST_DEVICE__
135 volatile __half& operator=(const __half_raw& x) volatile
136 {
137 data = x.data;
138 return *this;
139 }
140 volatile __half& operator=(const volatile __half_raw& x) volatile
141 {
142 data = x.data;
143 return *this;
144 }
145 __half& operator=(__half_raw&& x)
146 {
147 data = x.data;
148 return *this;
149 }
150 volatile __half& operator=(__half_raw&& x) volatile
151 {
152 data = x.data;
153 return *this;
154 }
155 volatile __half& operator=(volatile __half_raw&& x) volatile
156 {
157 data = x.data;
158 return *this;
159 }
160 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
161 template<
162 typename T,
163 Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
164 __HOST_DEVICE__
165 __half& operator=(T x)
166 {
167 data = static_cast<_Float16>(x);
168 return *this;
169 }
170 #endif
171
172 // MANIPULATORS - DEVICE ONLY
173 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
174 template<
175 typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
176 __device__
177 __half& operator=(T x)
178 {
179 data = static_cast<_Float16>(x);
180 return *this;
181 }
182 #endif
183
184 #if !defined(__HIP_NO_HALF_OPERATORS__)
185 __device__
186 __half& operator+=(const __half& x)
187 {
188 data += x.data;
189 return *this;
190 }
191 __device__
192 __half& operator-=(const __half& x)
193 {
194 data -= x.data;
195 return *this;
196 }
197 __device__
198 __half& operator*=(const __half& x)
199 {
200 data *= x.data;
201 return *this;
202 }
203 __device__
204 __half& operator/=(const __half& x)
205 {
206 data /= x.data;
207 return *this;
208 }
209 __device__
210 __half& operator++() { ++data; return *this; }
211 __device__
212 __half operator++(int)
213 {
214 __half tmp{*this};
215 ++*this;
216 return tmp;
217 }
218 __device__
219 __half& operator--() { --data; return *this; }
220 __device__
221 __half operator--(int)
222 {
223 __half tmp{*this};
224 --*this;
225 return tmp;
226 }
227 #endif
228
229 // ACCESSORS
230 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
231 template<
232 typename T,
233 Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
234 __HOST_DEVICE__
235 operator T() const { return data; }
236 #endif
237 __HOST_DEVICE__
238 operator __half_raw() const { return __half_raw{data}; }
239 __HOST_DEVICE__
240 operator __half_raw() const volatile
241 {
242 return __half_raw{data};
243 }
244
245 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
246 template<
247 typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
248 __HOST_DEVICE__
249 operator T() const { return data; }
250 #endif
251
252 #if !defined(__HIP_NO_HALF_OPERATORS__)
253 __device__
254 __half operator+() const { return *this; }
255 __device__
256 __half operator-() const
257 {
258 __half tmp{*this};
259 tmp.data = -tmp.data;
260 return tmp;
261 }
262 #endif
263
264 // FRIENDS
265 #if !defined(__HIP_NO_HALF_OPERATORS__)
266 friend
267 inline
268 __device__
269 __half operator+(const __half& x, const __half& y)
270 {
271 return __half{x} += y;
272 }
273 friend
274 inline
275 __device__
276 __half operator-(const __half& x, const __half& y)
277 {
278 return __half{x} -= y;
279 }
280 friend
281 inline
282 __device__
283 __half operator*(const __half& x, const __half& y)
284 {
285 return __half{x} *= y;
286 }
287 friend
288 inline
289 __device__
290 __half operator/(const __half& x, const __half& y)
291 {
292 return __half{x} /= y;
293 }
294 friend
295 inline
296 __device__
297 bool operator==(const __half& x, const __half& y)
298 {
299 return x.data == y.data;
300 }
301 friend
302 inline
303 __device__
304 bool operator!=(const __half& x, const __half& y)
305 {
306 return !(x == y);
307 }
308 friend
309 inline
310 __device__
311 bool operator<(const __half& x, const __half& y)
312 {
313 return x.data < y.data;
314 }
315 friend
316 inline
317 __device__
318 bool operator>(const __half& x, const __half& y)
319 {
320 return y.data < x.data;
321 }
322 friend
323 inline
324 __device__
325 bool operator<=(const __half& x, const __half& y)
326 {
327 return !(y < x);
328 }
329 friend
330 inline
331 __device__
332 bool operator>=(const __half& x, const __half& y)
333 {
334 return !(x < y);
335 }
336 #endif // !defined(__HIP_NO_HALF_OPERATORS__)
337 };
338 // END STRUCT __HALF
339
340 // BEGIN STRUCT __HALF2
341 struct __half2 {
342 public:
343 union {
344 static_assert(
345 sizeof(_Float16_2) == sizeof(unsigned short[2]), "");
346
347 struct {
348 __half x;
349 __half y;
350 };
351 _Float16_2 data;
352 };
353
354 // CREATORS
355 __HOST_DEVICE__
356 __half2() = default;
357 __HOST_DEVICE__
358 __half2(const __half2_raw& xx) : data{xx.data} {}
359 __HOST_DEVICE__
360 __half2(decltype(data) xx) : data{xx} {}
361 __HOST_DEVICE__
362 __half2(const __half& xx, const __half& yy)
363 :
364 data{static_cast<__half_raw>(xx).data,
365 static_cast<__half_raw>(yy).data}
366 {}
367 __HOST_DEVICE__
368 __half2(const __half2&) = default;
369 __HOST_DEVICE__
370 __half2(__half2&&) = default;
371 __HOST_DEVICE__
372 ~__half2() = default;
373
374 // MANIPULATORS
375 __HOST_DEVICE__
376 __half2& operator=(const __half2&) = default;
377 __HOST_DEVICE__
378 __half2& operator=(__half2&&) = default;
379 __HOST_DEVICE__
380 __half2& operator=(const __half2_raw& xx)
381 {
382 data = xx.data;
383 return *this;
384 }
385
386 // MANIPULATORS - DEVICE ONLY
387 #if !defined(__HIP_NO_HALF_OPERATORS__)
388 __device__
389 __half2& operator+=(const __half2& xx)
390 {
391 data += xx.data;
392 return *this;
393 }
394 __device__
395 __half2& operator-=(const __half2& xx)
396 {
397 data -= xx.data;
398 return *this;
399 }
400 __device__
401 __half2& operator*=(const __half2& xx)
402 {
403 data *= xx.data;
404 return *this;
405 }
406 __device__
407 __half2& operator/=(const __half2& xx)
408 {
409 data /= xx.data;
410 return *this;
411 }
412 __device__
413 __half2& operator++() { return *this += _Float16_2{1, 1}; }
414 __device__
415 __half2 operator++(int)
416 {
417 __half2 tmp{*this};
418 ++*this;
419 return tmp;
420 }
421 __device__
422 __half2& operator--() { return *this -= _Float16_2{1, 1}; }
423 __device__
424 __half2 operator--(int)
425 {
426 __half2 tmp{*this};
427 --*this;
428 return tmp;
429 }
430 #endif
431
432 // ACCESSORS
433 __HOST_DEVICE__
434 operator decltype(data)() const { return data; }
435 __HOST_DEVICE__
436 operator __half2_raw() const {
437 __half2_raw r;
438 r.data = data;
439 return r;
440 }
441
442 // ACCESSORS - DEVICE ONLY
443 #if !defined(__HIP_NO_HALF_OPERATORS__)
444 __device__
445 __half2 operator+() const { return *this; }
446 __device__
447 __half2 operator-() const
448 {
449 __half2 tmp{*this};
450 tmp.data = -tmp.data;
451 return tmp;
452 }
453 #endif
454
455 // FRIENDS
456 #if !defined(__HIP_NO_HALF_OPERATORS__)
457 friend
458 inline
459 __device__
460 __half2 operator+(const __half2& xx, const __half2& yy)
461 {
462 return __half2{xx} += yy;
463 }
464 friend
465 inline
466 __device__
467 __half2 operator-(const __half2& xx, const __half2& yy)
468 {
469 return __half2{xx} -= yy;
470 }
471 friend
472 inline
473 __device__
474 __half2 operator*(const __half2& xx, const __half2& yy)
475 {
476 return __half2{xx} *= yy;
477 }
478 friend
479 inline
480 __device__
481 __half2 operator/(const __half2& xx, const __half2& yy)
482 {
483 return __half2{xx} /= yy;
484 }
485 friend
486 inline
487 __device__
488 bool operator==(const __half2& xx, const __half2& yy)
489 {
490 auto r = xx.data == yy.data;
491 return r.x != 0 && r.y != 0;
492 }
493 friend
494 inline
495 __device__
496 bool operator!=(const __half2& xx, const __half2& yy)
497 {
498 return !(xx == yy);
499 }
500 friend
501 inline
502 __device__
503 bool operator<(const __half2& xx, const __half2& yy)
504 {
505 auto r = xx.data < yy.data;
506 return r.x != 0 && r.y != 0;
507 }
508 friend
509 inline
510 __device__
511 bool operator>(const __half2& xx, const __half2& yy)
512 {
513 return yy < xx;
514 }
515 friend
516 inline
517 __device__
518 bool operator<=(const __half2& xx, const __half2& yy)
519 {
520 return !(yy < xx);
521 }
522 friend
523 inline
524 __device__
525 bool operator>=(const __half2& xx, const __half2& yy)
526 {
527 return !(xx < yy);
528 }
529 #endif // !defined(__HIP_NO_HALF_OPERATORS__)
530 };
531 // END STRUCT __HALF2
532
533 namespace
534 {
535 inline
536 __HOST_DEVICE__
537 __half2 make_half2(__half x, __half y)
538 {
539 return __half2{x, y};
540 }
541
542 inline
543 __HOST_DEVICE__
544 __half __low2half(__half2 x)
545 {
546 return __half{__half_raw{static_cast<__half2_raw>(x).data.x}};
547 }
548
549 inline
550 __HOST_DEVICE__
551 __half __high2half(__half2 x)
552 {
553 return __half{__half_raw{static_cast<__half2_raw>(x).data.y}};
554 }
555
556 inline
557 __HOST_DEVICE__
558 __half2 __half2half2(__half x)
559 {
560 return __half2{x, x};
561 }
562
563 inline
564 __HOST_DEVICE__
565 __half2 __halves2half2(__half x, __half y)
566 {
567 return __half2{x, y};
568 }
569
570 inline
571 __HOST_DEVICE__
572 __half2 __low2half2(__half2 x)
573 {
574 return __half2{
575 _Float16_2{
576 static_cast<__half2_raw>(x).data.x,
577 static_cast<__half2_raw>(x).data.x}};
578 }
579
580 inline
581 __HOST_DEVICE__
582 __half2 __high2half2(__half2 x)
583 {
584 return __half2{
585 _Float16_2{
586 static_cast<__half2_raw>(x).data.y,
587 static_cast<__half2_raw>(x).data.y}};
588 }
589
590 inline
591 __HOST_DEVICE__
592 __half2 __lows2half2(__half2 x, __half2 y)
593 {
594 return __half2{
595 _Float16_2{
596 static_cast<__half2_raw>(x).data.x,
597 static_cast<__half2_raw>(y).data.x}};
598 }
599
600 inline
601 __HOST_DEVICE__
602 __half2 __highs2half2(__half2 x, __half2 y)
603 {
604 return __half2{
605 _Float16_2{
606 static_cast<__half2_raw>(x).data.y,
607 static_cast<__half2_raw>(y).data.y}};
608 }
609
610 inline
611 __HOST_DEVICE__
612 __half2 __lowhigh2highlow(__half2 x)
613 {
614 return __half2{
615 _Float16_2{
616 static_cast<__half2_raw>(x).data.y,
617 static_cast<__half2_raw>(x).data.x}};
618 }
619
620 // Bitcasts
621 inline
622 __device__
623 short __half_as_short(__half x)
624 {
625 return static_cast<__half_raw>(x).x;
626 }
627
628 inline
629 __device__
630 unsigned short __half_as_ushort(__half x)
631 {
632 return static_cast<__half_raw>(x).x;
633 }
634
635 inline
636 __device__
637 __half __short_as_half(short x)
638 {
639 __half_raw r; r.x = x;
640 return r;
641 }
642
643 inline
644 __device__
645 __half __ushort_as_half(unsigned short x)
646 {
647 __half_raw r; r.x = x;
648 return r;
649 }
650
651 // float -> half | half2
652 inline
653 __HOST_DEVICE__
654 __half __float2half(float x)
655 {
656 return __half_raw{static_cast<_Float16>(x)};
657 }
658 inline
659 __HOST_DEVICE__
660 __half __float2half_rn(float x)
661 {
662 return __half_raw{static_cast<_Float16>(x)};
663 }
664 #if !defined(__HIPCC_RTC__)
665 // TODO: rounding behaviour is not correct for host functions.
666 inline
668 __half __float2half_rz(float x)
669 {
670 return __half_raw{static_cast<_Float16>(x)};
671 }
672 inline
674 __half __float2half_rd(float x)
675 {
676 return __half_raw{static_cast<_Float16>(x)};
677 }
678 inline
680 __half __float2half_ru(float x)
681 {
682 return __half_raw{static_cast<_Float16>(x)};
683 }
684 #endif
685 inline
686 __device__
687 __half __float2half_rz(float x)
688 {
689 return __half_raw{__ocml_cvtrtz_f16_f32(x)};
690 }
691 inline
692 __device__
693 __half __float2half_rd(float x)
694 {
695 return __half_raw{__ocml_cvtrtn_f16_f32(x)};
696 }
697 inline
698 __device__
699 __half __float2half_ru(float x)
700 {
701 return __half_raw{__ocml_cvtrtp_f16_f32(x)};
702 }
703 inline
704 __HOST_DEVICE__
705 __half2 __float2half2_rn(float x)
706 {
707 return __half2{
708 _Float16_2{
709 static_cast<_Float16>(x), static_cast<_Float16>(x)}};
710 }
711 inline
712 __HOST_DEVICE__
713 __half2 __floats2half2_rn(float x, float y)
714 {
715 return __half2{_Float16_2{
716 static_cast<_Float16>(x), static_cast<_Float16>(y)}};
717 }
718 inline
719 __HOST_DEVICE__
720 __half2 __float22half2_rn(float2 x)
721 {
722 return __floats2half2_rn(x.x, x.y);
723 }
724
725 // half | half2 -> float
726 inline
727 __HOST_DEVICE__
728 float __half2float(__half x)
729 {
730 return static_cast<__half_raw>(x).data;
731 }
732 inline
733 __HOST_DEVICE__
734 float __low2float(__half2 x)
735 {
736 return static_cast<__half2_raw>(x).data.x;
737 }
738 inline
739 __HOST_DEVICE__
740 float __high2float(__half2 x)
741 {
742 return static_cast<__half2_raw>(x).data.y;
743 }
744 inline
745 __HOST_DEVICE__
746 float2 __half22float2(__half2 x)
747 {
748 return make_float2(
749 static_cast<__half2_raw>(x).data.x,
750 static_cast<__half2_raw>(x).data.y);
751 }
752
753 // half -> int
754 inline
755 __device__
756 int __half2int_rn(__half x)
757 {
758 return static_cast<__half_raw>(x).data;
759 }
760 inline
761 __device__
762 int __half2int_rz(__half x)
763 {
764 return static_cast<__half_raw>(x).data;
765 }
766 inline
767 __device__
768 int __half2int_rd(__half x)
769 {
770 return static_cast<__half_raw>(x).data;
771 }
772 inline
773 __device__
774 int __half2int_ru(__half x)
775 {
776 return static_cast<__half_raw>(x).data;
777 }
778
779 // int -> half
780 inline
781 __device__
782 __half __int2half_rn(int x)
783 {
784 return __half_raw{static_cast<_Float16>(x)};
785 }
786 inline
787 __device__
788 __half __int2half_rz(int x)
789 {
790 return __half_raw{static_cast<_Float16>(x)};
791 }
792 inline
793 __device__
794 __half __int2half_rd(int x)
795 {
796 return __half_raw{static_cast<_Float16>(x)};
797 }
798 inline
799 __device__
800 __half __int2half_ru(int x)
801 {
802 return __half_raw{static_cast<_Float16>(x)};
803 }
804
805 // half -> short
806 inline
807 __device__
808 short __half2short_rn(__half x)
809 {
810 return static_cast<__half_raw>(x).data;
811 }
812 inline
813 __device__
814 short __half2short_rz(__half x)
815 {
816 return static_cast<__half_raw>(x).data;
817 }
818 inline
819 __device__
820 short __half2short_rd(__half x)
821 {
822 return static_cast<__half_raw>(x).data;
823 }
824 inline
825 __device__
826 short __half2short_ru(__half x)
827 {
828 return static_cast<__half_raw>(x).data;
829 }
830
831 // short -> half
832 inline
833 __device__
834 __half __short2half_rn(short x)
835 {
836 return __half_raw{static_cast<_Float16>(x)};
837 }
838 inline
839 __device__
840 __half __short2half_rz(short x)
841 {
842 return __half_raw{static_cast<_Float16>(x)};
843 }
844 inline
845 __device__
846 __half __short2half_rd(short x)
847 {
848 return __half_raw{static_cast<_Float16>(x)};
849 }
850 inline
851 __device__
852 __half __short2half_ru(short x)
853 {
854 return __half_raw{static_cast<_Float16>(x)};
855 }
856
857 // half -> long long
858 inline
859 __device__
860 long long __half2ll_rn(__half x)
861 {
862 return static_cast<__half_raw>(x).data;
863 }
864 inline
865 __device__
866 long long __half2ll_rz(__half x)
867 {
868 return static_cast<__half_raw>(x).data;
869 }
870 inline
871 __device__
872 long long __half2ll_rd(__half x)
873 {
874 return static_cast<__half_raw>(x).data;
875 }
876 inline
877 __device__
878 long long __half2ll_ru(__half x)
879 {
880 return static_cast<__half_raw>(x).data;
881 }
882
883 // long long -> half
884 inline
885 __device__
886 __half __ll2half_rn(long long x)
887 {
888 return __half_raw{static_cast<_Float16>(x)};
889 }
890 inline
891 __device__
892 __half __ll2half_rz(long long x)
893 {
894 return __half_raw{static_cast<_Float16>(x)};
895 }
896 inline
897 __device__
898 __half __ll2half_rd(long long x)
899 {
900 return __half_raw{static_cast<_Float16>(x)};
901 }
902 inline
903 __device__
904 __half __ll2half_ru(long long x)
905 {
906 return __half_raw{static_cast<_Float16>(x)};
907 }
908
909 // half -> unsigned int
910 inline
911 __device__
912 unsigned int __half2uint_rn(__half x)
913 {
914 return static_cast<__half_raw>(x).data;
915 }
916 inline
917 __device__
918 unsigned int __half2uint_rz(__half x)
919 {
920 return static_cast<__half_raw>(x).data;
921 }
922 inline
923 __device__
924 unsigned int __half2uint_rd(__half x)
925 {
926 return static_cast<__half_raw>(x).data;
927 }
928 inline
929 __device__
930 unsigned int __half2uint_ru(__half x)
931 {
932 return static_cast<__half_raw>(x).data;
933 }
934
935 // unsigned int -> half
936 inline
937 __device__
938 __half __uint2half_rn(unsigned int x)
939 {
940 return __half_raw{static_cast<_Float16>(x)};
941 }
942 inline
943 __device__
944 __half __uint2half_rz(unsigned int x)
945 {
946 return __half_raw{static_cast<_Float16>(x)};
947 }
948 inline
949 __device__
950 __half __uint2half_rd(unsigned int x)
951 {
952 return __half_raw{static_cast<_Float16>(x)};
953 }
954 inline
955 __device__
956 __half __uint2half_ru(unsigned int x)
957 {
958 return __half_raw{static_cast<_Float16>(x)};
959 }
960
961 // half -> unsigned short
962 inline
963 __device__
964 unsigned short __half2ushort_rn(__half x)
965 {
966 return static_cast<__half_raw>(x).data;
967 }
968 inline
969 __device__
970 unsigned short __half2ushort_rz(__half x)
971 {
972 return static_cast<__half_raw>(x).data;
973 }
974 inline
975 __device__
976 unsigned short __half2ushort_rd(__half x)
977 {
978 return static_cast<__half_raw>(x).data;
979 }
980 inline
981 __device__
982 unsigned short __half2ushort_ru(__half x)
983 {
984 return static_cast<__half_raw>(x).data;
985 }
986
987 // unsigned short -> half
988 inline
989 __device__
990 __half __ushort2half_rn(unsigned short x)
991 {
992 return __half_raw{static_cast<_Float16>(x)};
993 }
994 inline
995 __device__
996 __half __ushort2half_rz(unsigned short x)
997 {
998 return __half_raw{static_cast<_Float16>(x)};
999 }
1000 inline
1001 __device__
1002 __half __ushort2half_rd(unsigned short x)
1003 {
1004 return __half_raw{static_cast<_Float16>(x)};
1005 }
1006 inline
1007 __device__
1008 __half __ushort2half_ru(unsigned short x)
1009 {
1010 return __half_raw{static_cast<_Float16>(x)};
1011 }
1012
1013 // half -> unsigned long long
1014 inline
1015 __device__
1016 unsigned long long __half2ull_rn(__half x)
1017 {
1018 return static_cast<__half_raw>(x).data;
1019 }
1020 inline
1021 __device__
1022 unsigned long long __half2ull_rz(__half x)
1023 {
1024 return static_cast<__half_raw>(x).data;
1025 }
1026 inline
1027 __device__
1028 unsigned long long __half2ull_rd(__half x)
1029 {
1030 return static_cast<__half_raw>(x).data;
1031 }
1032 inline
1033 __device__
1034 unsigned long long __half2ull_ru(__half x)
1035 {
1036 return static_cast<__half_raw>(x).data;
1037 }
1038
1039 // unsigned long long -> half
1040 inline
1041 __device__
1042 __half __ull2half_rn(unsigned long long x)
1043 {
1044 return __half_raw{static_cast<_Float16>(x)};
1045 }
1046 inline
1047 __device__
1048 __half __ull2half_rz(unsigned long long x)
1049 {
1050 return __half_raw{static_cast<_Float16>(x)};
1051 }
1052 inline
1053 __device__
1054 __half __ull2half_rd(unsigned long long x)
1055 {
1056 return __half_raw{static_cast<_Float16>(x)};
1057 }
1058 inline
1059 __device__
1060 __half __ull2half_ru(unsigned long long x)
1061 {
1062 return __half_raw{static_cast<_Float16>(x)};
1063 }
1064
1065 // Load primitives
1066 inline
1067 __device__
1068 __half __ldg(const __half* ptr) { return *ptr; }
1069 inline
1070 __device__
1071 __half __ldcg(const __half* ptr) { return *ptr; }
1072 inline
1073 __device__
1074 __half __ldca(const __half* ptr) { return *ptr; }
1075 inline
1076 __device__
1077 __half __ldcs(const __half* ptr) { return *ptr; }
1078
1079 inline
1080 __HOST_DEVICE__
1081 __half2 __ldg(const __half2* ptr) { return *ptr; }
1082 inline
1083 __HOST_DEVICE__
1084 __half2 __ldcg(const __half2* ptr) { return *ptr; }
1085 inline
1086 __HOST_DEVICE__
1087 __half2 __ldca(const __half2* ptr) { return *ptr; }
1088 inline
1089 __HOST_DEVICE__
1090 __half2 __ldcs(const __half2* ptr) { return *ptr; }
1091
1092 // Relations
1093 inline
1094 __device__
1095 bool __heq(__half x, __half y)
1096 {
1097 return static_cast<__half_raw>(x).data ==
1098 static_cast<__half_raw>(y).data;
1099 }
1100 inline
1101 __device__
1102 bool __hne(__half x, __half y)
1103 {
1104 return static_cast<__half_raw>(x).data !=
1105 static_cast<__half_raw>(y).data;
1106 }
1107 inline
1108 __device__
1109 bool __hle(__half x, __half y)
1110 {
1111 return static_cast<__half_raw>(x).data <=
1112 static_cast<__half_raw>(y).data;
1113 }
1114 inline
1115 __device__
1116 bool __hge(__half x, __half y)
1117 {
1118 return static_cast<__half_raw>(x).data >=
1119 static_cast<__half_raw>(y).data;
1120 }
1121 inline
1122 __device__
1123 bool __hlt(__half x, __half y)
1124 {
1125 return static_cast<__half_raw>(x).data <
1126 static_cast<__half_raw>(y).data;
1127 }
1128 inline
1129 __device__
1130 bool __hgt(__half x, __half y)
1131 {
1132 return static_cast<__half_raw>(x).data >
1133 static_cast<__half_raw>(y).data;
1134 }
1135 inline __device__
1136 bool __hequ(__half x, __half y) {
1137 return !(static_cast<__half_raw>(x).data < static_cast<__half_raw>(y).data) &&
1138 !(static_cast<__half_raw>(x).data > static_cast<__half_raw>(y).data);
1139 }
1140 inline __device__
1141 bool __hneu(__half x, __half y) {
1142 return !(static_cast<__half_raw>(x).data == static_cast<__half_raw>(y).data);
1143 }
1144 inline __device__
1145 bool __hleu(__half x, __half y) {
1146 return !(static_cast<__half_raw>(x).data > static_cast<__half_raw>(y).data);
1147 }
1148 inline
1149 __device__
1150 bool __hgeu(__half x, __half y) {
1151 return !(static_cast<__half_raw>(x).data < static_cast<__half_raw>(y).data);
1152 }
1153 inline
1154 __device__
1155 bool __hltu(__half x, __half y) {
1156 return !(static_cast<__half_raw>(x).data >= static_cast<__half_raw>(y).data);
1157 }
1158 inline
1159 __device__
1160 bool __hgtu(__half x, __half y) {
1161 return !(static_cast<__half_raw>(x).data <= static_cast<__half_raw>(y).data);
1162 }
1163
1164 inline
1165 __HOST_DEVICE__
1166 __half2 __heq2(__half2 x, __half2 y)
1167 {
1168 auto r = static_cast<__half2_raw>(x).data ==
1169 static_cast<__half2_raw>(y).data;
1170 return __builtin_convertvector(-r, _Float16_2);
1171 }
1172 inline
1173 __HOST_DEVICE__
1174 __half2 __hne2(__half2 x, __half2 y)
1175 {
1176 auto r = static_cast<__half2_raw>(x).data !=
1177 static_cast<__half2_raw>(y).data;
1178 return __builtin_convertvector(-r, _Float16_2);
1179 }
1180 inline
1181 __HOST_DEVICE__
1182 __half2 __hle2(__half2 x, __half2 y)
1183 {
1184 auto r = static_cast<__half2_raw>(x).data <=
1185 static_cast<__half2_raw>(y).data;
1186 return __builtin_convertvector(-r, _Float16_2);
1187 }
1188 inline
1189 __HOST_DEVICE__
1190 __half2 __hge2(__half2 x, __half2 y)
1191 {
1192 auto r = static_cast<__half2_raw>(x).data >=
1193 static_cast<__half2_raw>(y).data;
1194 return __builtin_convertvector(-r, _Float16_2);
1195 }
1196 inline
1197 __HOST_DEVICE__
1198 __half2 __hlt2(__half2 x, __half2 y)
1199 {
1200 auto r = static_cast<__half2_raw>(x).data <
1201 static_cast<__half2_raw>(y).data;
1202 return __builtin_convertvector(-r, _Float16_2);
1203 }
1204 inline
1205 __HOST_DEVICE__
1206 __half2 __hgt2(__half2 x, __half2 y)
1207 {
1208 auto r = static_cast<__half2_raw>(x).data >
1209 static_cast<__half2_raw>(y).data;
1210 return __builtin_convertvector(-r, _Float16_2);
1211 }
1212 inline __HOST_DEVICE__
1213 __half2 __hequ2(__half2 x, __half2 y) {
1214 auto r = !(static_cast<__half2_raw>(x).data < static_cast<__half2_raw>(y).data) &&
1215 !(static_cast<__half2_raw>(x).data > static_cast<__half2_raw>(y).data);
1216 return __builtin_convertvector(-r, _Float16_2);
1217 }
1218 inline
1219 __HOST_DEVICE__
1220 __half2 __hneu2(__half2 x, __half2 y) {
1221 auto r = !(static_cast<__half2_raw>(x).data == static_cast<__half2_raw>(y).data);
1222 return __builtin_convertvector(-r, _Float16_2);
1223 }
1224 inline
1225 __HOST_DEVICE__
1226 __half2 __hleu2(__half2 x, __half2 y) {
1227 auto r = !(static_cast<__half2_raw>(x).data > static_cast<__half2_raw>(y).data);
1228 return __builtin_convertvector(-r, _Float16_2);
1229 }
1230 inline
1231 __HOST_DEVICE__
1232 __half2 __hgeu2(__half2 x, __half2 y) {
1233 auto r = !(static_cast<__half2_raw>(x).data < static_cast<__half2_raw>(y).data);
1234 return __builtin_convertvector(-r, _Float16_2);
1235 }
1236 inline
1237 __HOST_DEVICE__
1238 __half2 __hltu2(__half2 x, __half2 y) {
1239 auto r = !(static_cast<__half2_raw>(x).data >= static_cast<__half2_raw>(y).data);
1240 return __builtin_convertvector(-r, _Float16_2);
1241 }
1242 inline
1243 __HOST_DEVICE__
1244 __half2 __hgtu2(__half2 x, __half2 y) {
1245 auto r = !(static_cast<__half2_raw>(x).data <= static_cast<__half2_raw>(y).data);
1246 return __builtin_convertvector(-r, _Float16_2);
1247 }
1248
1249 inline
1250 __HOST_DEVICE__
1251 bool __hbeq2(__half2 x, __half2 y)
1252 {
1253 auto r = static_cast<__half2_raw>(__heq2(x, y));
1254 return r.data.x != 0 && r.data.y != 0;
1255 }
1256 inline
1257 __HOST_DEVICE__
1258 bool __hbne2(__half2 x, __half2 y)
1259 {
1260 auto r = static_cast<__half2_raw>(__hne2(x, y));
1261 return r.data.x != 0 && r.data.y != 0;
1262 }
1263 inline
1264 __HOST_DEVICE__
1265 bool __hble2(__half2 x, __half2 y)
1266 {
1267 auto r = static_cast<__half2_raw>(__hle2(x, y));
1268 return r.data.x != 0 && r.data.y != 0;
1269 }
1270 inline
1271 __HOST_DEVICE__
1272 bool __hbge2(__half2 x, __half2 y)
1273 {
1274 auto r = static_cast<__half2_raw>(__hge2(x, y));
1275 return r.data.x != 0 && r.data.y != 0;
1276 }
1277 inline
1278 __HOST_DEVICE__
1279 bool __hblt2(__half2 x, __half2 y)
1280 {
1281 auto r = static_cast<__half2_raw>(__hlt2(x, y));
1282 return r.data.x != 0 && r.data.y != 0;
1283 }
1284 inline
1285 __HOST_DEVICE__
1286 bool __hbgt2(__half2 x, __half2 y)
1287 {
1288 auto r = static_cast<__half2_raw>(__hgt2(x, y));
1289 return r.data.x != 0 && r.data.y != 0;
1290 }
1291 inline
1292 __HOST_DEVICE__
1293 bool __hbequ2(__half2 x, __half2 y) { return __hbeq2(x, y); }
1294 inline
1295 __HOST_DEVICE__
1296 bool __hbneu2(__half2 x, __half2 y) { return __hbne2(x, y); }
1297 inline
1298 __HOST_DEVICE__
1299 bool __hbleu2(__half2 x, __half2 y) { return __hble2(x, y); }
1300 inline
1301 __HOST_DEVICE__
1302 bool __hbgeu2(__half2 x, __half2 y) { return __hbge2(x, y); }
1303 inline
1304 __HOST_DEVICE__
1305 bool __hbltu2(__half2 x, __half2 y) { return __hblt2(x, y); }
1306 inline
1307 __HOST_DEVICE__
1308 bool __hbgtu2(__half2 x, __half2 y) { return __hbgt2(x, y); }
1309 inline
1310 __device__
1311 __half __hmax(const __half x, const __half y) {
1312 return __half_raw{__ocml_fmax_f16(static_cast<__half_raw>(x).data,
1313 static_cast<__half_raw>(y).data)};
1314 }
1315 inline
1316 __device__
1317 __half __hmax_nan(const __half x, const __half y) {
1318 if(__ocml_isnan_f16(static_cast<__half_raw>(x).data)) {
1319 return x;
1320 } else if (__ocml_isnan_f16(static_cast<__half_raw>(y).data)) {
1321 return y;
1322 }
1323 return __hmax(x, y);
1324 }
1325 inline
1326 __device__
1327 __half __hmin(const __half x, const __half y) {
1328 return __half_raw{__ocml_fmin_f16(static_cast<__half_raw>(x).data,
1329 static_cast<__half_raw>(y).data)};
1330 }
1331 inline
1332 __device__
1333 __half __hmin_nan(const __half x, const __half y) {
1334 if(__ocml_isnan_f16(static_cast<__half_raw>(x).data)) {
1335 return x;
1336 } else if (__ocml_isnan_f16(static_cast<__half_raw>(y).data)) {
1337 return y;
1338 }
1339 return __hmin(x, y);
1340 }
1341
1342 // Arithmetic
1343 inline
1344 __device__
1345 __half __clamp_01(__half x)
1346 {
1347 auto r = static_cast<__half_raw>(x);
1348
1349 if (__hlt(x, __half_raw{0})) return __half_raw{0};
1350 if (__hlt(__half_raw{1}, x)) return __half_raw{1};
1351 return r;
1352 }
1353
1354 inline
1355 __device__
1356 __half __hadd(__half x, __half y)
1357 {
1358 return __half_raw{
1359 static_cast<__half_raw>(x).data +
1360 static_cast<__half_raw>(y).data};
1361 }
1362 inline
1363 __device__
1364 __half __habs(__half x)
1365 {
1366 return __half_raw{
1367 __ocml_fabs_f16(static_cast<__half_raw>(x).data)};
1368 }
1369 inline
1370 __device__
1371 __half __hsub(__half x, __half y)
1372 {
1373 return __half_raw{
1374 static_cast<__half_raw>(x).data -
1375 static_cast<__half_raw>(y).data};
1376 }
1377 inline
1378 __device__
1379 __half __hmul(__half x, __half y)
1380 {
1381 return __half_raw{
1382 static_cast<__half_raw>(x).data *
1383 static_cast<__half_raw>(y).data};
1384 }
1385 inline
1386 __device__
1387 __half __hadd_sat(__half x, __half y)
1388 {
1389 return __clamp_01(__hadd(x, y));
1390 }
1391 inline
1392 __device__
1393 __half __hsub_sat(__half x, __half y)
1394 {
1395 return __clamp_01(__hsub(x, y));
1396 }
1397 inline
1398 __device__
1399 __half __hmul_sat(__half x, __half y)
1400 {
1401 return __clamp_01(__hmul(x, y));
1402 }
1403 inline
1404 __device__
1405 __half __hfma(__half x, __half y, __half z)
1406 {
1407 return __half_raw{__ocml_fma_f16(
1408 static_cast<__half_raw>(x).data,
1409 static_cast<__half_raw>(y).data,
1410 static_cast<__half_raw>(z).data)};
1411 }
1412 inline
1413 __device__
1414 __half __hfma_sat(__half x, __half y, __half z)
1415 {
1416 return __clamp_01(__hfma(x, y, z));
1417 }
1418 inline
1419 __device__
1420 __half __hdiv(__half x, __half y)
1421 {
1422 return __half_raw{
1423 static_cast<__half_raw>(x).data /
1424 static_cast<__half_raw>(y).data};
1425 }
1426
1427 inline
1428 __HOST_DEVICE__
1429 __half2 __hadd2(__half2 x, __half2 y)
1430 {
1431 return __half2{
1432 static_cast<__half2_raw>(x).data +
1433 static_cast<__half2_raw>(y).data};
1434 }
1435 inline
1436 __HOST_DEVICE__
1437 __half2 __habs2(__half2 x)
1438 {
1439 return __half2{
1440 __ocml_fabs_2f16(static_cast<__half2_raw>(x).data)};
1441 }
1442 inline
1443 __HOST_DEVICE__
1444 __half2 __hsub2(__half2 x, __half2 y)
1445 {
1446 return __half2{
1447 static_cast<__half2_raw>(x).data -
1448 static_cast<__half2_raw>(y).data};
1449 }
1450 inline
1451 __HOST_DEVICE__
1452 __half2 __hmul2(__half2 x, __half2 y)
1453 {
1454 return __half2{
1455 static_cast<__half2_raw>(x).data *
1456 static_cast<__half2_raw>(y).data};
1457 }
1458 inline
1459 __HOST_DEVICE__
1460 __half2 __hadd2_sat(__half2 x, __half2 y)
1461 {
1462 auto r = static_cast<__half2_raw>(__hadd2(x, y));
1463 return __half2{
1464 __clamp_01(__half_raw{r.data.x}),
1465 __clamp_01(__half_raw{r.data.y})};
1466 }
1467 inline
1468 __HOST_DEVICE__
1469 __half2 __hsub2_sat(__half2 x, __half2 y)
1470 {
1471 auto r = static_cast<__half2_raw>(__hsub2(x, y));
1472 return __half2{
1473 __clamp_01(__half_raw{r.data.x}),
1474 __clamp_01(__half_raw{r.data.y})};
1475 }
1476 inline
1477 __HOST_DEVICE__
1478 __half2 __hmul2_sat(__half2 x, __half2 y)
1479 {
1480 auto r = static_cast<__half2_raw>(__hmul2(x, y));
1481 return __half2{
1482 __clamp_01(__half_raw{r.data.x}),
1483 __clamp_01(__half_raw{r.data.y})};
1484 }
1485 inline
1486 __HOST_DEVICE__
1487 __half2 __hfma2(__half2 x, __half2 y, __half2 z)
1488 {
1489 return __half2{__ocml_fma_2f16(x, y, z)};
1490 }
1491 inline
1492 __HOST_DEVICE__
1493 __half2 __hfma2_sat(__half2 x, __half2 y, __half2 z)
1494 {
1495 auto r = static_cast<__half2_raw>(__hfma2(x, y, z));
1496 return __half2{
1497 __clamp_01(__half_raw{r.data.x}),
1498 __clamp_01(__half_raw{r.data.y})};
1499 }
1500 inline
1501 __HOST_DEVICE__
1502 __half2 __h2div(__half2 x, __half2 y)
1503 {
1504 return __half2{
1505 static_cast<__half2_raw>(x).data /
1506 static_cast<__half2_raw>(y).data};
1507 }
1508
1509 // Atomic
1510 #if defined(__clang__) && defined(__HIP__)
1511 inline __device__ __half2 unsafeAtomicAdd(__half2* address, __half2 value) {
1512 #if __has_builtin(__builtin_amdgcn_flat_atomic_fadd_v2f16)
1513 // The api expects an ext_vector_type of half
1514 typedef _Float16 __attribute__((ext_vector_type(2))) vec_fp162;
1515 static_assert(sizeof(vec_fp162) == sizeof(__half2_raw));
1516 union {
1517 __half2_raw h2r;
1518 vec_fp162 fp16;
1519 } u {static_cast<__half2_raw>(value)};
1520 u.fp16 =
1521 __builtin_amdgcn_flat_atomic_fadd_v2f16((vec_fp162*)address, u.fp16);
1522 return static_cast<__half2>(u.h2r);
1523 #else
1524 static_assert(sizeof(__half2_raw) == sizeof(unsigned int));
1525 union u_hold {
1526 __half2_raw h2r;
1527 unsigned int u32;
1528 };
1529 u_hold old_val, new_val;
1530 old_val.u32 = __hip_atomic_load((unsigned int*)address, __ATOMIC_RELAXED,
1531 __HIP_MEMORY_SCOPE_AGENT);
1532 do {
1533 new_val.h2r = __hadd2(old_val.h2r, value);
1534 } while (!__hip_atomic_compare_exchange_strong(
1535 (unsigned int*)address, &old_val.u32, new_val.u32, __ATOMIC_RELAXED,
1536 __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT));
1537 return old_val.h2r;
1538 #endif
1539 }
1540 #endif // defined(__clang__) && defined(__HIP__)
1541
1542 // Math functions
1543 #if defined(__clang__) && defined(__HIP__)
1544 inline
1545 __device__
1546 float amd_mixed_dot(__half2 a, __half2 b, float c, bool saturate) {
1547 return __ockl_fdot2(static_cast<__half2_raw>(a).data,
1548 static_cast<__half2_raw>(b).data,
1549 c, saturate);
1550 }
1551 #endif
1552 inline
1553 __device__
1554 __half htrunc(__half x)
1555 {
1556 return __half_raw{
1557 __ocml_trunc_f16(static_cast<__half_raw>(x).data)};
1558 }
1559 inline
1560 __device__
1561 __half hceil(__half x)
1562 {
1563 return __half_raw{
1564 __ocml_ceil_f16(static_cast<__half_raw>(x).data)};
1565 }
1566 inline
1567 __device__
1568 __half hfloor(__half x)
1569 {
1570 return __half_raw{
1571 __ocml_floor_f16(static_cast<__half_raw>(x).data)};
1572 }
1573 inline
1574 __device__
1575 __half hrint(__half x)
1576 {
1577 return __half_raw{
1578 __ocml_rint_f16(static_cast<__half_raw>(x).data)};
1579 }
1580 inline
1581 __device__
1582 __half hsin(__half x)
1583 {
1584 return __half_raw{
1585 __ocml_sin_f16(static_cast<__half_raw>(x).data)};
1586 }
1587 inline
1588 __device__
1589 __half hcos(__half x)
1590 {
1591 return __half_raw{
1592 __ocml_cos_f16(static_cast<__half_raw>(x).data)};
1593 }
1594 inline
1595 __device__
1596 __half hexp(__half x)
1597 {
1598 return __half_raw{
1599 __ocml_exp_f16(static_cast<__half_raw>(x).data)};
1600 }
1601 inline
1602 __device__
1603 __half hexp2(__half x)
1604 {
1605 return __half_raw{
1606 __ocml_exp2_f16(static_cast<__half_raw>(x).data)};
1607 }
1608 inline
1609 __device__
1610 __half hexp10(__half x)
1611 {
1612 return __half_raw{
1613 __ocml_exp10_f16(static_cast<__half_raw>(x).data)};
1614 }
1615 inline
1616 __device__
1617 __half hlog2(__half x)
1618 {
1619 return __half_raw{
1620 __ocml_log2_f16(static_cast<__half_raw>(x).data)};
1621 }
1622 inline
1623 __device__
1624 __half hlog(__half x)
1625 {
1626 return __half_raw{
1627 __ocml_log_f16(static_cast<__half_raw>(x).data)};
1628 }
1629 inline
1630 __device__
1631 __half hlog10(__half x)
1632 {
1633 return __half_raw{
1634 __ocml_log10_f16(static_cast<__half_raw>(x).data)};
1635 }
1636 inline
1637 __device__
1638 __half hrcp(__half x)
1639 {
1640 return __half_raw{
1641 static_cast<_Float16>(1.0f) /static_cast<__half_raw>(x).data};
1642 }
1643 inline
1644 __device__
1645 __half hrsqrt(__half x)
1646 {
1647 return __half_raw{
1648 __ocml_rsqrt_f16(static_cast<__half_raw>(x).data)};
1649 }
1650 inline
1651 __device__
1652 __half hsqrt(__half x)
1653 {
1654 return __half_raw{
1655 __ocml_sqrt_f16(static_cast<__half_raw>(x).data)};
1656 }
1657 inline
1658 __device__
1659 bool __hisinf(__half x)
1660 {
1661 return __ocml_isinf_f16(static_cast<__half_raw>(x).data);
1662 }
1663 inline
1664 __device__
1665 bool __hisnan(__half x)
1666 {
1667 return __ocml_isnan_f16(static_cast<__half_raw>(x).data);
1668 }
1669 inline
1670 __device__
1671 __half __hneg(__half x)
1672 {
1673 return __half_raw{-static_cast<__half_raw>(x).data};
1674 }
1675
1676 inline
1677 __HOST_DEVICE__
1678 __half2 h2trunc(__half2 x)
1679 {
1680 return __half2{__ocml_trunc_2f16(x)};
1681 }
1682 inline
1683 __HOST_DEVICE__
1684 __half2 h2ceil(__half2 x)
1685 {
1686 return __half2{__ocml_ceil_2f16(x)};
1687 }
1688 inline
1689 __HOST_DEVICE__
1690 __half2 h2floor(__half2 x)
1691 {
1692 return __half2{__ocml_floor_2f16(x)};
1693 }
1694 inline
1695 __HOST_DEVICE__
1696 __half2 h2rint(__half2 x)
1697 {
1698 return __half2{__ocml_rint_2f16(x)};
1699 }
1700 inline
1701 __HOST_DEVICE__
1702 __half2 h2sin(__half2 x)
1703 {
1704 return __half2{__ocml_sin_2f16(x)};
1705 }
1706 inline
1707 __HOST_DEVICE__
1708 __half2 h2cos(__half2 x)
1709 {
1710 return __half2{__ocml_cos_2f16(x)};
1711 }
1712 inline
1713 __HOST_DEVICE__
1714 __half2 h2exp(__half2 x)
1715 {
1716 return __half2{__ocml_exp_2f16(x)};
1717 }
1718 inline
1719 __HOST_DEVICE__
1720 __half2 h2exp2(__half2 x)
1721 {
1722 return __half2{__ocml_exp2_2f16(x)};
1723 }
1724 inline
1725 __HOST_DEVICE__
1726 __half2 h2exp10(__half2 x)
1727 {
1728 return __half2{__ocml_exp10_2f16(x)};
1729 }
1730 inline
1731 __HOST_DEVICE__
1732 __half2 h2log2(__half2 x)
1733 {
1734 return __half2{__ocml_log2_2f16(x)};
1735 }
1736 inline
1737 __HOST_DEVICE__
1738 __half2 h2log(__half2 x) { return __ocml_log_2f16(x); }
1739 inline
1740 __HOST_DEVICE__
1741 __half2 h2log10(__half2 x) { return __ocml_log10_2f16(x); }
1742 inline
1743 __HOST_DEVICE__
1744 __half2 h2rcp(__half2 x) {
1745 return _Float16_2{
1746 _Float16_2{static_cast<_Float16>(1.0f), static_cast<_Float16>(1.0f)} / x.data};
1747 }
1748 inline
1749 __HOST_DEVICE__
1750 __half2 h2rsqrt(__half2 x) { return __ocml_rsqrt_2f16(x); }
1751 inline
1752 __HOST_DEVICE__
1753 __half2 h2sqrt(__half2 x) { return __ocml_sqrt_2f16(x); }
1754 inline
1755 __HOST_DEVICE__
1756 __half2 __hisinf2(__half2 x)
1757 {
1758 auto r = __ocml_isinf_2f16(x);
1759 return __half2{_Float16_2{
1760 static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
1761 }
1762 inline
1763 __HOST_DEVICE__
1764 __half2 __hisnan2(__half2 x)
1765 {
1766 auto r = __ocml_isnan_2f16(x);
1767 return __half2{_Float16_2{
1768 static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
1769 }
1770 inline
1771 __HOST_DEVICE__
1772 __half2 __hneg2(__half2 x)
1773 {
1774 return __half2{-static_cast<__half2_raw>(x).data};
1775 }
1776 } // Anonymous namespace.
1777
1778 #if !defined(HIP_NO_HALF)
1779 using half = __half;
1780 using half2 = __half2;
1781 #endif
1782 __device__
1783 inline
1784 __half __shfl(__half var, int src_lane, int width = warpSize) {
1785 union { int i; __half h; } tmp; tmp.h = var;
1786 tmp.i = __shfl(tmp.i, src_lane, width);
1787 return tmp.h;
1788 }
1789 __device__
1790 inline
1791 __half2 __shfl(__half2 var, int src_lane, int width = warpSize) {
1792 union { int i; __half2 h; } tmp; tmp.h = var;
1793 tmp.i = __shfl(tmp.i, src_lane, width);
1794 return tmp.h;
1795 }
1796 __device__
1797 inline
1798 __half __shfl_up(__half var, unsigned int lane_delta, int width = warpSize) {
1799 union { int i; __half h; } tmp; tmp.h = var;
1800 tmp.i = __shfl_up(tmp.i, lane_delta, width);
1801 return tmp.h;
1802 }
1803 __device__
1804 inline
1805 __half2 __shfl_up(__half2 var, unsigned int lane_delta, int width = warpSize) {
1806 union { int i; __half2 h; } tmp; tmp.h = var;
1807 tmp.i = __shfl_up(tmp.i, lane_delta, width);
1808 return tmp.h;
1809 }
1810 __device__
1811 inline
1812 __half __shfl_down(__half var, unsigned int lane_delta, int width = warpSize) {
1813 union { int i; __half h; } tmp; tmp.h = var;
1814 tmp.i = __shfl_down(tmp.i, lane_delta, width);
1815 return tmp.h;
1816 }
1817 __device__
1818 inline
1819 __half2 __shfl_down(__half2 var, unsigned int lane_delta, int width = warpSize) {
1820 union { int i; __half2 h; } tmp; tmp.h = var;
1821 tmp.i = __shfl_down(tmp.i, lane_delta, width);
1822 return tmp.h;
1823 }
1824 __device__
1825 inline
1826 __half __shfl_xor(__half var, int lane_mask, int width = warpSize) {
1827 union { int i; __half h; } tmp; tmp.h = var;
1828 tmp.i = __shfl_xor(tmp.i, lane_mask, width);
1829 return tmp.h;
1830 }
1831 __device__
1832 inline
1833 __half2 __shfl_xor(__half2 var, int lane_mask, int width = warpSize) {
1834 union { int i; __half2 h; } tmp; tmp.h = var;
1835 tmp.i = __shfl_xor(tmp.i, lane_mask, width);
1836 return tmp.h;
1837 }
1838 #endif // defined(__cplusplus)
1839#elif defined(__GNUC__) || defined(_MSC_VER)
1840 #if !defined(__HIPCC_RTC__)
1841 #include "hip_fp16_gcc.h"
1842 #endif
1843#endif // !defined(__clang__) && defined(__GNUC__)
1844
1845#endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
#define __host__
Definition host_defines.h:170
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b)
Subtracts two bfloat16 values.
Definition amd_hip_bf16.h:805
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator-=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to subtract-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1016
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator+(const __hip_bfloat16 &l)
Operator to unary+ on a __hip_bfloat16 number.
Definition amd_hip_bf16.h:939
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator/(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1025
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator-(const __hip_bfloat16 &l)
Operator to negate a __hip_bfloat16 number.
Definition amd_hip_bf16.h:954
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hneg(const __hip_bfloat16 a)
Negate a bfloat16 value.
Definition amd_hip_bf16.h:839
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b)
Adds two bfloat16 values.
Definition amd_hip_bf16.h:797
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator/=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1034
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator*(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:921
__BF16_DEVICE_STATIC__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b, const __hip_bfloat16 c)
Performs FMA of given bfloat16 values.
Definition amd_hip_bf16.h:821
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator*=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:930
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b)
Multiplies two bfloat16 values.
Definition amd_hip_bf16.h:831
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator++(__hip_bfloat16 &l, const int)
Operator to post increment a __hip_bfloat16 number.
Definition amd_hip_bf16.h:969
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator+=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to add-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1007
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __habs(const __hip_bfloat16 a)
Returns absolute of a bfloat16.
Definition amd_hip_bf16.h:849
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator--(__hip_bfloat16 &l, const int)
Operator to post decrement a __hip_bfloat16 number.
Definition amd_hip_bf16.h:988
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b)
Divides two bfloat16 values.
Definition amd_hip_bf16.h:813
__BF16_HOST_DEVICE_STATIC__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values.
Definition amd_hip_bf16.h:1171
__BF16_HOST_DEVICE_STATIC__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than.
Definition amd_hip_bf16.h:1188
__BF16_HOST_DEVICE_STATIC__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - not equal.
Definition amd_hip_bf16.h:1220
__BF16_HOST_DEVICE_STATIC__ bool operator==(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform an equal compare on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1493
__BF16_HOST_DEVICE_STATIC__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than equal.
Definition amd_hip_bf16.h:1276
__BF16_HOST_DEVICE_STATIC__ bool operator!=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a not equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1501
__BF16_HOST_DEVICE_STATIC__ bool operator>(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1525
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return min.
Definition amd_hip_bf16.h:1248
__BF16_HOST_DEVICE_STATIC__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than equal.
Definition amd_hip_bf16.h:1204
__BF16_HOST_DEVICE_STATIC__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than equal.
Definition amd_hip_bf16.h:1284
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return max.
Definition amd_hip_bf16.h:1236
__BF16_HOST_DEVICE_STATIC__ bool operator<=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1517
__BF16_HOST_DEVICE_STATIC__ int __hisinf(const __hip_bfloat16 a)
Checks if number is inf.
Definition amd_hip_bf16.h:1292
__BF16_HOST_DEVICE_STATIC__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered not equal.
Definition amd_hip_bf16.h:1228
__BF16_HOST_DEVICE_STATIC__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than.
Definition amd_hip_bf16.h:1196
__BF16_HOST_DEVICE_STATIC__ bool operator<(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1509
__BF16_HOST_DEVICE_STATIC__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than.
Definition amd_hip_bf16.h:1268
__BF16_HOST_DEVICE_STATIC__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than equal.
Definition amd_hip_bf16.h:1212
__BF16_HOST_DEVICE_STATIC__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than operator.
Definition amd_hip_bf16.h:1260
__BF16_HOST_DEVICE_STATIC__ bool operator>=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1533
__BF16_HOST_DEVICE_STATIC__ bool __hisnan(const __hip_bfloat16 a)
Checks if number is nan.
Definition amd_hip_bf16.h:1301
__BF16_HOST_DEVICE_STATIC__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered equal.
Definition amd_hip_bf16.h:1179
__BF16_HOST_DEVICE_STATIC__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b - unordered.
Definition amd_hip_bf16.h:1334
__BF16_HOST_DEVICE_STATIC__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b.
Definition amd_hip_bf16.h:1342
__BF16_HOST_DEVICE_STATIC__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b.
Definition amd_hip_bf16.h:1358
__BF16_HOST_DEVICE_STATIC__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b.
Definition amd_hip_bf16.h:1374
__BF16_HOST_DEVICE_STATIC__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b - unordered.
Definition amd_hip_bf16.h:1366
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks for not equal to.
Definition amd_hip_bf16.h:1483
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1416
__BF16_HOST_DEVICE_STATIC__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:1390
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1455
__BF16_HOST_DEVICE_STATIC__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b - unordered.
Definition amd_hip_bf16.h:1382
__BF16_HOST_DEVICE_STATIC__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal - unordered.
Definition amd_hip_bf16.h:1318
__BF16_HOST_DEVICE_STATIC__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal.
Definition amd_hip_bf16.h:1310
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1445
__BF16_HOST_DEVICE_STATIC__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:1398
__BF16_HOST_DEVICE_STATIC__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b - unordered.
Definition amd_hip_bf16.h:1350
__BF16_HOST_DEVICE_STATIC__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b.
Definition amd_hip_bf16.h:1326
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b, returns 1.0 if equal, otherwise 0.0.
Definition amd_hip_bf16.h:1406
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1426
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a)
Check for a is NaN, returns 1.0 if NaN, otherwise 0.0.
Definition amd_hip_bf16.h:1436
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __habs2(const __hip_bfloat162 a)
Returns absolute of a bfloat162.
Definition amd_hip_bf16.h:869
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Subtracts two bfloat162 values.
Definition amd_hip_bf16.h:912
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Multiplies two bfloat162 values.
Definition amd_hip_bf16.h:895
__BF16_DEVICE_STATIC__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat162 b, const __hip_bfloat162 c)
Performs FMA of given bfloat162 values.
Definition amd_hip_bf16.h:886
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Adds two bfloat162 values.
Definition amd_hip_bf16.h:877
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a)
Converts a bfloat162 into negative.
Definition amd_hip_bf16.h:904
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b)
Divides bfloat162 values.
Definition amd_hip_bf16.h:859
__BF16_HOST_DEVICE_STATIC__ float __low2float(const __hip_bfloat162 a)
Converts low 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:636
__BF16_HOST_DEVICE_STATIC__ float __high2float(const __hip_bfloat162 a)
Converts high 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:605
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a)
Swaps both halves.
Definition amd_hip_bf16.h:644
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp10(const __hip_bfloat16 h)
Calculate exponential 10 of bfloat16.
Definition amd_hip_bf16.h:1618
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog(const __hip_bfloat16 h)
Calculate natural log of bfloat16.
Definition amd_hip_bf16.h:1642
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp2(const __hip_bfloat16 h)
Calculate exponential 2 of bfloat16.
Definition amd_hip_bf16.h:1626
__BF16_DEVICE_STATIC__ __hip_bfloat16 hceil(const __hip_bfloat16 h)
Calculate ceil of bfloat16.
Definition amd_hip_bf16.h:1594
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrcp(const __hip_bfloat16 h)
Calculate reciprocal.
Definition amd_hip_bf16.h:1666
__BF16_DEVICE_STATIC__ __hip_bfloat16 hsqrt(const __hip_bfloat16 h)
Calculate sqrt of bfloat16.
Definition amd_hip_bf16.h:1698
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog10(const __hip_bfloat16 h)
Calculate log 10 of bfloat16.
Definition amd_hip_bf16.h:1650
__BF16_DEVICE_STATIC__ __hip_bfloat16 hsin(const __hip_bfloat16 h)
Calculate sin of bfloat16.
Definition amd_hip_bf16.h:1690
__BF16_DEVICE_STATIC__ __hip_bfloat16 hfloor(const __hip_bfloat16 h)
Calculate floor of bfloat16.
Definition amd_hip_bf16.h:1634
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrint(const __hip_bfloat16 h)
Round to nearest int.
Definition amd_hip_bf16.h:1674
__BF16_DEVICE_STATIC__ __hip_bfloat16 htrunc(const __hip_bfloat16 h)
Calculate truncate of bfloat16.
Definition amd_hip_bf16.h:1706
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrsqrt(const __hip_bfloat16 h)
Reciprocal square root.
Definition amd_hip_bf16.h:1682
__BF16_DEVICE_STATIC__ __hip_bfloat16 hcos(const __hip_bfloat16 h)
Calculate cosine of bfloat16.
Definition amd_hip_bf16.h:1602
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog2(const __hip_bfloat16 h)
Calculate log 2 of bfloat16.
Definition amd_hip_bf16.h:1658
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp(const __hip_bfloat16 h)
Calculate exponential of bfloat16.
Definition amd_hip_bf16.h:1610
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h)
Calculate truncate of bfloat162.
Definition amd_hip_bf16.h:1826
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rcp(const __hip_bfloat162 h)
Calculate vector reciprocal.
Definition amd_hip_bf16.h:1786
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log(const __hip_bfloat162 h)
Calculate natural log of bfloat162.
Definition amd_hip_bf16.h:1762
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp(const __hip_bfloat162 h)
Calculate exponential of bfloat162.
Definition amd_hip_bf16.h:1730
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2cos(const __hip_bfloat162 h)
Calculate cosine of bfloat162.
Definition amd_hip_bf16.h:1722
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sin(const __hip_bfloat162 h)
Calculate sin of bfloat162.
Definition amd_hip_bf16.h:1810
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log2(const __hip_bfloat162 h)
Calculate log 2 of bfloat162.
Definition amd_hip_bf16.h:1778
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2ceil(const __hip_bfloat162 h)
Calculate ceil of bfloat162.
Definition amd_hip_bf16.h:1714
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2floor(const __hip_bfloat162 h)
Calculate floor of bfloat162.
Definition amd_hip_bf16.h:1754
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp10(const __hip_bfloat162 h)
Calculate exponential 10 of bfloat162.
Definition amd_hip_bf16.h:1738
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp2(const __hip_bfloat162 h)
Calculate exponential 2 of bfloat162.
Definition amd_hip_bf16.h:1746
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log10(const __hip_bfloat162 h)
Calculate log 10 of bfloat162.
Definition amd_hip_bf16.h:1770
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rsqrt(const __hip_bfloat162 h)
Calculate vector reciprocal square root.
Definition amd_hip_bf16.h:1802
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rint(const __hip_bfloat162 h)
Calculate vector round to nearest int.
Definition amd_hip_bf16.h:1794
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h)
Calculate sqrt of bfloat162.
Definition amd_hip_bf16.h:1818
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Definition hip_fp16_math_fwd.h:57
Definition amd_hip_vector_types.h:2035
Definition hip_fp16_gcc.h:7
Definition hip_fp16_gcc.h:11