30#ifdef HIP_ENABLE_WARP_SYNC_BUILTINS
32#if !defined(__HIPCC_RTC__)
33#include "amd_warp_functions.h"
34#include "hip_assert.h"
39T __hip_readfirstlane(T val) {
50 unsigned long long lower = (unsigned)__builtin_amdgcn_readfirstlane(u.l);
51 unsigned long long upper =
52 (unsigned)__builtin_amdgcn_readfirstlane(u.l >> 32);
53 u.l = (upper << 32) | lower;
58#define __hip_adjust_mask_for_wave32(MASK) \
60 if (warpSize == 32) MASK &= 0xFFFFFFFF; \
88#define __hip_check_mask(MASK) \
90 __hip_assert(MASK && "mask must be non-zero"); \
92 while (__any(!done)) { \
94 auto chosen_mask = __hip_readfirstlane(MASK); \
95 if (MASK == chosen_mask) { \
96 __hip_assert(MASK == __ballot(true) && \
97 "all threads specified in the mask" \
98 " must execute the same operation with the same mask"); \
105#define __hip_do_sync(RETVAL, FUNC, MASK, ...) \
107 __hip_assert(MASK && "mask must be non-zero"); \
109 while (__any(!done)) { \
111 auto chosen_mask = __hip_readfirstlane(MASK); \
112 if (MASK == chosen_mask) { \
113 __hip_assert(MASK == __ballot(true) && \
114 "all threads specified in the mask" \
115 " must execute the same operation with the same mask"); \
116 RETVAL = FUNC(__VA_ARGS__); \
125template <
typename MaskT>
127unsigned long long __ballot_sync(MaskT mask,
int predicate) {
129 __hip_internal::is_integral<MaskT>::value &&
sizeof(MaskT) == 8,
130 "The mask must be a 64-bit integer. "
131 "Implicitly promoting a smaller integer is almost always an error.");
132 __hip_adjust_mask_for_wave32(mask);
133 __hip_check_mask(mask);
134 return __ballot(predicate) & mask;
137template <
typename MaskT>
139int __all_sync(MaskT mask,
int predicate) {
141 __hip_internal::is_integral<MaskT>::value &&
sizeof(MaskT) == 8,
142 "The mask must be a 64-bit integer. "
143 "Implicitly promoting a smaller integer is almost always an error.");
144 __hip_adjust_mask_for_wave32(mask);
145 return __ballot_sync(mask, predicate) == mask;
148template <
typename MaskT>
150int __any_sync(MaskT mask,
int predicate) {
152 __hip_internal::is_integral<MaskT>::value &&
sizeof(MaskT) == 8,
153 "The mask must be a 64-bit integer. "
154 "Implicitly promoting a smaller integer is almost always an error.");
155 __hip_adjust_mask_for_wave32(mask);
156 return __ballot_sync(mask, predicate) != 0;
163unsigned long long __match_any(T value) {
165 (__hip_internal::is_integral<T>::value || __hip_internal::is_floating_point<T>::value) &&
166 (
sizeof(T) == 4 ||
sizeof(T) == 8),
167 "T can be int, unsigned int, long, unsigned long, long long, unsigned "
168 "long long, float or double.");
170 unsigned long long retval = 0;
172 while (__any(!done)) {
174 T chosen = __hip_readfirstlane(value);
175 if (chosen == value) {
176 retval = __activemask();
185template <
typename MaskT,
typename T>
187unsigned long long __match_any_sync(MaskT mask, T value) {
189 __hip_internal::is_integral<MaskT>::value &&
sizeof(MaskT) == 8,
190 "The mask must be a 64-bit integer. "
191 "Implicitly promoting a smaller integer is almost always an error.");
192 __hip_adjust_mask_for_wave32(mask);
193 __hip_check_mask(mask);
194 return __match_any(value) & mask;
199unsigned long long __match_all(T value,
int* pred) {
201 (__hip_internal::is_integral<T>::value || __hip_internal::is_floating_point<T>::value) &&
202 (
sizeof(T) == 4 ||
sizeof(T) == 8),
203 "T can be int, unsigned int, long, unsigned long, long long, unsigned "
204 "long long, float or double.");
205 T first = __hip_readfirstlane(value);
206 if (__all(first == value)) {
208 return __activemask();
215template <
typename MaskT,
typename T>
217unsigned long long __match_all_sync(MaskT mask, T value,
int* pred) {
219 __hip_internal::is_integral<MaskT>::value &&
sizeof(MaskT) == 8,
220 "The mask must be a 64-bit integer. "
221 "Implicitly promoting a smaller integer is almost always an error.");
223 __hip_adjust_mask_for_wave32(mask);
224 __hip_do_sync(retval, __match_all, mask, value, pred);
230template <
typename MaskT,
typename T>
232T __shfl_sync(MaskT mask, T var,
int srcLane,
233 int width = __AMDGCN_WAVEFRONT_SIZE) {
235 __hip_internal::is_integral<MaskT>::value &&
sizeof(MaskT) == 8,
236 "The mask must be a 64-bit integer. "
237 "Implicitly promoting a smaller integer is almost always an error.");
238 __hip_adjust_mask_for_wave32(mask);
239 __hip_check_mask(mask);
240 return __shfl(var, srcLane, width);
243template <
typename MaskT,
typename T>
245T __shfl_up_sync(MaskT mask, T var,
unsigned int delta,
246 int width = __AMDGCN_WAVEFRONT_SIZE) {
248 __hip_internal::is_integral<MaskT>::value &&
sizeof(MaskT) == 8,
249 "The mask must be a 64-bit integer. "
250 "Implicitly promoting a smaller integer is almost always an error.");
251 __hip_adjust_mask_for_wave32(mask);
252 __hip_check_mask(mask);
253 return __shfl_up(var, delta, width);
256template <
typename MaskT,
typename T>
258T __shfl_down_sync(MaskT mask, T var,
unsigned int delta,
259 int width = __AMDGCN_WAVEFRONT_SIZE) {
261 __hip_internal::is_integral<MaskT>::value &&
sizeof(MaskT) == 8,
262 "The mask must be a 64-bit integer. "
263 "Implicitly promoting a smaller integer is almost always an error.");
264 __hip_adjust_mask_for_wave32(mask);
265 __hip_check_mask(mask);
266 return __shfl_down(var, delta, width);
269template <
typename MaskT,
typename T>
271T __shfl_xor_sync(MaskT mask, T var,
int laneMask,
272 int width = __AMDGCN_WAVEFRONT_SIZE) {
274 __hip_internal::is_integral<MaskT>::value &&
sizeof(MaskT) == 8,
275 "The mask must be a 64-bit integer. "
276 "Implicitly promoting a smaller integer is almost always an error.");
277 __hip_adjust_mask_for_wave32(mask);
278 __hip_check_mask(mask);
279 return __shfl_xor(var, laneMask, width);
283#undef __hip_check_mask
284#undef __hip_adjust_mask_for_wave32