libstdc++
simd_builtin.h
1 // Simd Abi specific implementations -*- C++ -*-
2 
3 // Copyright (C) 2020-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_
27 
28 #if __cplusplus >= 201703L
29 
30 #include <array>
31 #include <cmath>
32 #include <cstdlib>
33 
34 _GLIBCXX_SIMD_BEGIN_NAMESPACE
35 // _S_allbits{{{
36 template <typename _V>
37  static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_allbits
38  = reinterpret_cast<_V>(~__vector_type_t<char, sizeof(_V) / sizeof(char)>());
39 
40 // }}}
41 // _S_signmask, _S_absmask{{{
42 template <typename _V, typename = _VectorTraits<_V>>
43  static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_signmask
44  = __xor(_V() + 1, _V() - 1);
45 
46 template <typename _V, typename = _VectorTraits<_V>>
47  static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_absmask
48  = __andnot(_S_signmask<_V>, _S_allbits<_V>);
49 
50 //}}}
51 // __vector_permute<Indices...>{{{
52 // Index == -1 requests zeroing of the output element
53 template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
54  _Tp
55  __vector_permute(_Tp __x)
56  {
57  static_assert(sizeof...(_Indices) == _TVT::_S_full_size);
58  return __make_vector<typename _TVT::value_type>(
59  (_Indices == -1 ? 0 : __x[_Indices == -1 ? 0 : _Indices])...);
60  }
61 
62 // }}}
63 // __vector_shuffle<Indices...>{{{
64 // Index == -1 requests zeroing of the output element
65 template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
66  _Tp
67  __vector_shuffle(_Tp __x, _Tp __y)
68  {
69  return _Tp{(_Indices == -1 ? 0
70  : _Indices < _TVT::_S_full_size
71  ? __x[_Indices]
72  : __y[_Indices - _TVT::_S_full_size])...};
73  }
74 
75 // }}}
76 // __make_wrapper{{{
77 template <typename _Tp, typename... _Args>
78  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, sizeof...(_Args)>
79  __make_wrapper(const _Args&... __args)
80  { return __make_vector<_Tp>(__args...); }
81 
82 // }}}
83 // __wrapper_bitcast{{{
84 template <typename _Tp, size_t _ToN = 0, typename _Up, size_t _M,
85  size_t _Np = _ToN != 0 ? _ToN : sizeof(_Up) * _M / sizeof(_Tp)>
86  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _Np>
87  __wrapper_bitcast(_SimdWrapper<_Up, _M> __x)
88  {
89  static_assert(_Np > 1);
90  return __intrin_bitcast<__vector_type_t<_Tp, _Np>>(__x._M_data);
91  }
92 
93 // }}}
94 // __shift_elements_right{{{
95 // if (__shift % 2ⁿ == 0) => the low n Bytes are correct
96 template <unsigned __shift, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
97  _GLIBCXX_SIMD_INTRINSIC _Tp
98  __shift_elements_right(_Tp __v)
99  {
100  [[maybe_unused]] const auto __iv = __to_intrin(__v);
101  static_assert(__shift <= sizeof(_Tp));
102  if constexpr (__shift == 0)
103  return __v;
104  else if constexpr (__shift == sizeof(_Tp))
105  return _Tp();
106 #if _GLIBCXX_SIMD_X86INTRIN // {{{
107  else if constexpr (__have_sse && __shift == 8
108  && _TVT::template _S_is<float, 4>)
109  return _mm_movehl_ps(__iv, __iv);
110  else if constexpr (__have_sse2 && __shift == 8
111  && _TVT::template _S_is<double, 2>)
112  return _mm_unpackhi_pd(__iv, __iv);
113  else if constexpr (__have_sse2 && sizeof(_Tp) == 16)
114  return reinterpret_cast<typename _TVT::type>(
115  _mm_srli_si128(reinterpret_cast<__m128i>(__iv), __shift));
116  else if constexpr (__shift == 16 && sizeof(_Tp) == 32)
117  {
118  /*if constexpr (__have_avx && _TVT::template _S_is<double, 4>)
119  return _mm256_permute2f128_pd(__iv, __iv, 0x81);
120  else if constexpr (__have_avx && _TVT::template _S_is<float, 8>)
121  return _mm256_permute2f128_ps(__iv, __iv, 0x81);
122  else if constexpr (__have_avx)
123  return reinterpret_cast<typename _TVT::type>(
124  _mm256_permute2f128_si256(__iv, __iv, 0x81));
125  else*/
126  return __zero_extend(__hi128(__v));
127  }
128  else if constexpr (__have_avx2 && sizeof(_Tp) == 32 && __shift < 16)
129  {
130  const auto __vll = __vector_bitcast<_LLong>(__v);
131  return reinterpret_cast<typename _TVT::type>(
132  _mm256_alignr_epi8(_mm256_permute2x128_si256(__vll, __vll, 0x81),
133  __vll, __shift));
134  }
135  else if constexpr (__have_avx && sizeof(_Tp) == 32 && __shift < 16)
136  {
137  const auto __vll = __vector_bitcast<_LLong>(__v);
138  return reinterpret_cast<typename _TVT::type>(
139  __concat(_mm_alignr_epi8(__hi128(__vll), __lo128(__vll), __shift),
140  _mm_srli_si128(__hi128(__vll), __shift)));
141  }
142  else if constexpr (sizeof(_Tp) == 32 && __shift > 16)
143  return __zero_extend(__shift_elements_right<__shift - 16>(__hi128(__v)));
144  else if constexpr (sizeof(_Tp) == 64 && __shift == 32)
145  return __zero_extend(__hi256(__v));
146  else if constexpr (__have_avx512f && sizeof(_Tp) == 64)
147  {
148  if constexpr (__shift >= 48)
149  return __zero_extend(
150  __shift_elements_right<__shift - 48>(__extract<3, 4>(__v)));
151  else if constexpr (__shift >= 32)
152  return __zero_extend(
153  __shift_elements_right<__shift - 32>(__hi256(__v)));
154  else if constexpr (__shift % 8 == 0)
155  return reinterpret_cast<typename _TVT::type>(
156  _mm512_alignr_epi64(__m512i(), __intrin_bitcast<__m512i>(__v),
157  __shift / 8));
158  else if constexpr (__shift % 4 == 0)
159  return reinterpret_cast<typename _TVT::type>(
160  _mm512_alignr_epi32(__m512i(), __intrin_bitcast<__m512i>(__v),
161  __shift / 4));
162  else if constexpr (__have_avx512bw && __shift < 16)
163  {
164  const auto __vll = __vector_bitcast<_LLong>(__v);
165  return reinterpret_cast<typename _TVT::type>(
166  _mm512_alignr_epi8(_mm512_shuffle_i32x4(__vll, __vll, 0xf9),
167  __vll, __shift));
168  }
169  else if constexpr (__have_avx512bw && __shift < 32)
170  {
171  const auto __vll = __vector_bitcast<_LLong>(__v);
172  return reinterpret_cast<typename _TVT::type>(
173  _mm512_alignr_epi8(_mm512_shuffle_i32x4(__vll, __m512i(), 0xee),
174  _mm512_shuffle_i32x4(__vll, __vll, 0xf9),
175  __shift - 16));
176  }
177  else
178  __assert_unreachable<_Tp>();
179  }
180  /*
181  } else if constexpr (__shift % 16 == 0 && sizeof(_Tp) == 64)
182  return __auto_bitcast(__extract<__shift / 16, 4>(__v));
183  */
184 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
185  else
186  {
187  constexpr int __chunksize = __shift % 8 == 0 ? 8
188  : __shift % 4 == 0 ? 4
189  : __shift % 2 == 0 ? 2
190  : 1;
191  auto __w = __vector_bitcast<__int_with_sizeof_t<__chunksize>>(__v);
192  using _Up = decltype(__w);
193  return __intrin_bitcast<_Tp>(
194  __call_with_n_evaluations<(sizeof(_Tp) - __shift) / __chunksize>(
195  [](auto... __chunks) { return _Up{__chunks...}; },
196  [&](auto __i) { return __w[__shift / __chunksize + __i]; }));
197  }
198  }
199 
200 // }}}
201 // __extract_part(_SimdWrapper<_Tp, _Np>) {{{
202 template <int _Index, int _Total, int _Combine, typename _Tp, size_t _Np>
203  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST
204  _SimdWrapper<_Tp, _Np / _Total * _Combine>
205  __extract_part(const _SimdWrapper<_Tp, _Np> __x)
206  {
207  if constexpr (_Index % 2 == 0 && _Total % 2 == 0 && _Combine % 2 == 0)
208  return __extract_part<_Index / 2, _Total / 2, _Combine / 2>(__x);
209  else
210  {
211  constexpr size_t __values_per_part = _Np / _Total;
212  constexpr size_t __values_to_skip = _Index * __values_per_part;
213  constexpr size_t __return_size = __values_per_part * _Combine;
214  using _R = __vector_type_t<_Tp, __return_size>;
215  static_assert((_Index + _Combine) * __values_per_part * sizeof(_Tp)
216  <= sizeof(__x),
217  "out of bounds __extract_part");
218  // the following assertion would ensure no "padding" to be read
219  // static_assert(_Total >= _Index + _Combine, "_Total must be greater
220  // than _Index");
221 
222  // static_assert(__return_size * _Total == _Np, "_Np must be divisible
223  // by _Total");
224  if (__x._M_is_constprop())
225  return __generate_from_n_evaluations<__return_size, _R>(
226  [&](auto __i) { return __x[__values_to_skip + __i]; });
227  if constexpr (_Index == 0 && _Total == 1)
228  return __x;
229  else if constexpr (_Index == 0)
230  return __intrin_bitcast<_R>(__as_vector(__x));
231 #if _GLIBCXX_SIMD_X86INTRIN // {{{
232  else if constexpr (sizeof(__x) == 32
233  && __return_size * sizeof(_Tp) <= 16)
234  {
235  constexpr size_t __bytes_to_skip = __values_to_skip * sizeof(_Tp);
236  if constexpr (__bytes_to_skip == 16)
237  return __vector_bitcast<_Tp, __return_size>(
238  __hi128(__as_vector(__x)));
239  else
240  return __vector_bitcast<_Tp, __return_size>(
241  _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
242  __lo128(__vector_bitcast<_LLong>(__x)),
243  __bytes_to_skip));
244  }
245 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
246  else if constexpr (_Index > 0
247  && (__values_to_skip % __return_size != 0
248  || sizeof(_R) >= 8)
249  && (__values_to_skip + __return_size) * sizeof(_Tp)
250  <= 64
251  && sizeof(__x) >= 16)
252  return __intrin_bitcast<_R>(
253  __shift_elements_right<__values_to_skip * sizeof(_Tp)>(
254  __as_vector(__x)));
255  else
256  {
257  _R __r = {};
258  __builtin_memcpy(&__r,
259  reinterpret_cast<const char*>(&__x)
260  + sizeof(_Tp) * __values_to_skip,
261  __return_size * sizeof(_Tp));
262  return __r;
263  }
264  }
265  }
266 
267 // }}}
268 // __extract_part(_SimdWrapper<bool, _Np>) {{{
269 template <int _Index, int _Total, int _Combine = 1, size_t _Np>
270  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _Np / _Total * _Combine>
271  __extract_part(const _SimdWrapper<bool, _Np> __x)
272  {
273  static_assert(_Combine == 1, "_Combine != 1 not implemented");
274  static_assert(__have_avx512f && _Np == _Np);
275  static_assert(_Total >= 2 && _Index + _Combine <= _Total && _Index >= 0);
276  return __x._M_data >> (_Index * _Np / _Total);
277  }
278 
279 // }}}
280 
281 // __vector_convert {{{
282 // implementation requires an index sequence
283 template <typename _To, typename _From, size_t... _I>
284  _GLIBCXX_SIMD_INTRINSIC constexpr _To
285  __vector_convert(_From __a, index_sequence<_I...>)
286  {
287  using _Tp = typename _VectorTraits<_To>::value_type;
288  return _To{static_cast<_Tp>(__a[_I])...};
289  }
290 
291 template <typename _To, typename _From, size_t... _I>
292  _GLIBCXX_SIMD_INTRINSIC constexpr _To
293  __vector_convert(_From __a, _From __b, index_sequence<_I...>)
294  {
295  using _Tp = typename _VectorTraits<_To>::value_type;
296  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...};
297  }
298 
299 template <typename _To, typename _From, size_t... _I>
300  _GLIBCXX_SIMD_INTRINSIC constexpr _To
301  __vector_convert(_From __a, _From __b, _From __c, index_sequence<_I...>)
302  {
303  using _Tp = typename _VectorTraits<_To>::value_type;
304  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
305  static_cast<_Tp>(__c[_I])...};
306  }
307 
308 template <typename _To, typename _From, size_t... _I>
309  _GLIBCXX_SIMD_INTRINSIC constexpr _To
310  __vector_convert(_From __a, _From __b, _From __c, _From __d,
311  index_sequence<_I...>)
312  {
313  using _Tp = typename _VectorTraits<_To>::value_type;
314  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
315  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...};
316  }
317 
318 template <typename _To, typename _From, size_t... _I>
319  _GLIBCXX_SIMD_INTRINSIC constexpr _To
320  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
321  index_sequence<_I...>)
322  {
323  using _Tp = typename _VectorTraits<_To>::value_type;
324  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
325  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
326  static_cast<_Tp>(__e[_I])...};
327  }
328 
329 template <typename _To, typename _From, size_t... _I>
330  _GLIBCXX_SIMD_INTRINSIC constexpr _To
331  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
332  _From __f, index_sequence<_I...>)
333  {
334  using _Tp = typename _VectorTraits<_To>::value_type;
335  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
336  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
337  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...};
338  }
339 
340 template <typename _To, typename _From, size_t... _I>
341  _GLIBCXX_SIMD_INTRINSIC constexpr _To
342  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
343  _From __f, _From __g, index_sequence<_I...>)
344  {
345  using _Tp = typename _VectorTraits<_To>::value_type;
346  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
347  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
348  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
349  static_cast<_Tp>(__g[_I])...};
350  }
351 
352 template <typename _To, typename _From, size_t... _I>
353  _GLIBCXX_SIMD_INTRINSIC constexpr _To
354  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
355  _From __f, _From __g, _From __h, index_sequence<_I...>)
356  {
357  using _Tp = typename _VectorTraits<_To>::value_type;
358  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
359  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
360  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
361  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...};
362  }
363 
364 template <typename _To, typename _From, size_t... _I>
365  _GLIBCXX_SIMD_INTRINSIC constexpr _To
366  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
367  _From __f, _From __g, _From __h, _From __i,
368  index_sequence<_I...>)
369  {
370  using _Tp = typename _VectorTraits<_To>::value_type;
371  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
372  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
373  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
374  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
375  static_cast<_Tp>(__i[_I])...};
376  }
377 
378 template <typename _To, typename _From, size_t... _I>
379  _GLIBCXX_SIMD_INTRINSIC constexpr _To
380  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
381  _From __f, _From __g, _From __h, _From __i, _From __j,
382  index_sequence<_I...>)
383  {
384  using _Tp = typename _VectorTraits<_To>::value_type;
385  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
386  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
387  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
388  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
389  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...};
390  }
391 
392 template <typename _To, typename _From, size_t... _I>
393  _GLIBCXX_SIMD_INTRINSIC constexpr _To
394  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
395  _From __f, _From __g, _From __h, _From __i, _From __j,
396  _From __k, index_sequence<_I...>)
397  {
398  using _Tp = typename _VectorTraits<_To>::value_type;
399  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
400  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
401  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
402  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
403  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
404  static_cast<_Tp>(__k[_I])...};
405  }
406 
407 template <typename _To, typename _From, size_t... _I>
408  _GLIBCXX_SIMD_INTRINSIC constexpr _To
409  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
410  _From __f, _From __g, _From __h, _From __i, _From __j,
411  _From __k, _From __l, index_sequence<_I...>)
412  {
413  using _Tp = typename _VectorTraits<_To>::value_type;
414  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
415  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
416  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
417  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
418  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
419  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...};
420  }
421 
422 template <typename _To, typename _From, size_t... _I>
423  _GLIBCXX_SIMD_INTRINSIC constexpr _To
424  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
425  _From __f, _From __g, _From __h, _From __i, _From __j,
426  _From __k, _From __l, _From __m, index_sequence<_I...>)
427  {
428  using _Tp = typename _VectorTraits<_To>::value_type;
429  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
430  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
431  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
432  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
433  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
434  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
435  static_cast<_Tp>(__m[_I])...};
436  }
437 
438 template <typename _To, typename _From, size_t... _I>
439  _GLIBCXX_SIMD_INTRINSIC constexpr _To
440  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
441  _From __f, _From __g, _From __h, _From __i, _From __j,
442  _From __k, _From __l, _From __m, _From __n,
443  index_sequence<_I...>)
444  {
445  using _Tp = typename _VectorTraits<_To>::value_type;
446  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
447  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
448  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
449  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
450  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
451  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
452  static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])...};
453  }
454 
455 template <typename _To, typename _From, size_t... _I>
456  _GLIBCXX_SIMD_INTRINSIC constexpr _To
457  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
458  _From __f, _From __g, _From __h, _From __i, _From __j,
459  _From __k, _From __l, _From __m, _From __n, _From __o,
460  index_sequence<_I...>)
461  {
462  using _Tp = typename _VectorTraits<_To>::value_type;
463  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
464  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
465  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
466  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
467  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
468  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
469  static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])...,
470  static_cast<_Tp>(__o[_I])...};
471  }
472 
473 template <typename _To, typename _From, size_t... _I>
474  _GLIBCXX_SIMD_INTRINSIC constexpr _To
475  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
476  _From __f, _From __g, _From __h, _From __i, _From __j,
477  _From __k, _From __l, _From __m, _From __n, _From __o,
478  _From __p, index_sequence<_I...>)
479  {
480  using _Tp = typename _VectorTraits<_To>::value_type;
481  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
482  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
483  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
484  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
485  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
486  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
487  static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])...,
488  static_cast<_Tp>(__o[_I])..., static_cast<_Tp>(__p[_I])...};
489  }
490 
491 // Defer actual conversion to the overload that takes an index sequence. Note
492 // that this function adds zeros or drops values off the end if you don't ensure
493 // matching width.
494 template <typename _To, typename... _From, size_t _FromSize>
495  _GLIBCXX_SIMD_INTRINSIC constexpr _To
496  __vector_convert(_SimdWrapper<_From, _FromSize>... __xs)
497  {
498 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
499  using _From0 = __first_of_pack_t<_From...>;
500  using _FW = _SimdWrapper<_From0, _FromSize>;
501  if (!_FW::_S_is_partial && !(... && __xs._M_is_constprop()))
502  {
503  if constexpr ((sizeof...(_From) & (sizeof...(_From) - 1))
504  == 0) // power-of-two number of arguments
505  return __convert_x86<_To>(__as_vector(__xs)...);
506  else // append zeros and recurse until the above branch is taken
507  return __vector_convert<_To>(__xs..., _FW{});
508  }
509  else
510 #endif
511  return __vector_convert<_To>(
512  __as_vector(__xs)...,
513  make_index_sequence<(sizeof...(__xs) == 1 ? std::min(
514  _VectorTraits<_To>::_S_full_size, int(_FromSize))
515  : _FromSize)>());
516  }
517 
518 // }}}
519 // __convert function{{{
520 template <typename _To, typename _From, typename... _More>
521  _GLIBCXX_SIMD_INTRINSIC constexpr auto
522  __convert(_From __v0, _More... __vs)
523  {
524  static_assert((true && ... && is_same_v<_From, _More>) );
525  if constexpr (__is_vectorizable_v<_From>)
526  {
527  using _V = typename _VectorTraits<_To>::type;
528  using _Tp = typename _VectorTraits<_To>::value_type;
529  return _V{static_cast<_Tp>(__v0), static_cast<_Tp>(__vs)...};
530  }
531  else if constexpr (__is_vector_type_v<_From>)
532  return __convert<_To>(__as_wrapper(__v0), __as_wrapper(__vs)...);
533  else // _SimdWrapper arguments
534  {
535  constexpr size_t __input_size = _From::_S_size * (1 + sizeof...(_More));
536  if constexpr (__is_vectorizable_v<_To>)
537  return __convert<__vector_type_t<_To, __input_size>>(__v0, __vs...);
538  else if constexpr (!__is_vector_type_v<_To>)
539  return _To(__convert<typename _To::_BuiltinType>(__v0, __vs...));
540  else
541  {
542  static_assert(
543  sizeof...(_More) == 0
544  || _VectorTraits<_To>::_S_full_size >= __input_size,
545  "__convert(...) requires the input to fit into the output");
546  return __vector_convert<_To>(__v0, __vs...);
547  }
548  }
549  }
550 
551 // }}}
552 // __convert_all{{{
553 // Converts __v into array<_To, N>, where N is _NParts if non-zero or
554 // otherwise deduced from _To such that N * #elements(_To) <= #elements(__v).
555 // Note: this function may return less than all converted elements
556 template <typename _To,
557  size_t _NParts = 0, // allows to convert fewer or more (only last
558  // _To, to be partially filled) than all
559  size_t _Offset = 0, // where to start, # of elements (not Bytes or
560  // Parts)
561  typename _From, typename _FromVT = _VectorTraits<_From>>
562  _GLIBCXX_SIMD_INTRINSIC auto
563  __convert_all(_From __v)
564  {
565  if constexpr (is_arithmetic_v<_To> && _NParts != 1)
566  {
567  static_assert(_Offset < _FromVT::_S_full_size);
568  constexpr auto _Np
569  = _NParts == 0 ? _FromVT::_S_partial_width - _Offset : _NParts;
570  return __generate_from_n_evaluations<_Np, array<_To, _Np>>(
571  [&](auto __i) { return static_cast<_To>(__v[__i + _Offset]); });
572  }
573  else
574  {
575  static_assert(__is_vector_type_v<_To>);
576  using _ToVT = _VectorTraits<_To>;
577  if constexpr (__is_vector_type_v<_From>)
578  return __convert_all<_To, _NParts>(__as_wrapper(__v));
579  else if constexpr (_NParts == 1)
580  {
581  static_assert(_Offset % _ToVT::_S_full_size == 0);
582  return array<_To, 1>{__vector_convert<_To>(
583  __extract_part<_Offset / _ToVT::_S_full_size,
584  __div_roundup(_FromVT::_S_partial_width,
585  _ToVT::_S_full_size)>(__v))};
586  }
587 #if _GLIBCXX_SIMD_X86INTRIN // {{{
588  else if constexpr (!__have_sse4_1 && _Offset == 0
589  && is_integral_v<typename _FromVT::value_type>
590  && sizeof(typename _FromVT::value_type)
591  < sizeof(typename _ToVT::value_type)
592  && !(sizeof(typename _FromVT::value_type) == 4
593  && is_same_v<typename _ToVT::value_type, double>))
594  {
595  using _ToT = typename _ToVT::value_type;
596  using _FromT = typename _FromVT::value_type;
597  constexpr size_t _Np
598  = _NParts != 0
599  ? _NParts
600  : (_FromVT::_S_partial_width / _ToVT::_S_full_size);
601  using _R = array<_To, _Np>;
602  // __adjust modifies its input to have _Np (use _SizeConstant)
603  // entries so that no unnecessary intermediate conversions are
604  // requested and, more importantly, no intermediate conversions are
605  // missing
606  [[maybe_unused]] auto __adjust
607  = [](auto __n,
608  auto __vv) -> _SimdWrapper<_FromT, decltype(__n)::value> {
609  return __vector_bitcast<_FromT, decltype(__n)::value>(__vv);
610  };
611  [[maybe_unused]] const auto __vi = __to_intrin(__v);
612  auto&& __make_array = [](auto __x0, [[maybe_unused]] auto __x1) {
613  if constexpr (_Np == 1)
614  return _R{__intrin_bitcast<_To>(__x0)};
615  else
616  return _R{__intrin_bitcast<_To>(__x0),
617  __intrin_bitcast<_To>(__x1)};
618  };
619 
620  if constexpr (_Np == 0)
621  return _R{};
622  else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) == 2)
623  {
624  static_assert(is_integral_v<_FromT>);
625  static_assert(is_integral_v<_ToT>);
626  if constexpr (is_unsigned_v<_FromT>)
627  return __make_array(_mm_unpacklo_epi8(__vi, __m128i()),
628  _mm_unpackhi_epi8(__vi, __m128i()));
629  else
630  return __make_array(
631  _mm_srai_epi16(_mm_unpacklo_epi8(__vi, __vi), 8),
632  _mm_srai_epi16(_mm_unpackhi_epi8(__vi, __vi), 8));
633  }
634  else if constexpr (sizeof(_FromT) == 2 && sizeof(_ToT) == 4)
635  {
636  static_assert(is_integral_v<_FromT>);
637  if constexpr (is_floating_point_v<_ToT>)
638  {
639  const auto __ints
640  = __convert_all<__vector_type16_t<int>, _Np>(
641  __adjust(_SizeConstant<_Np * 4>(), __v));
642  return __generate_from_n_evaluations<_Np, _R>(
643  [&](auto __i) {
644  return __vector_convert<_To>(__as_wrapper(__ints[__i]));
645  });
646  }
647  else if constexpr (is_unsigned_v<_FromT>)
648  return __make_array(_mm_unpacklo_epi16(__vi, __m128i()),
649  _mm_unpackhi_epi16(__vi, __m128i()));
650  else
651  return __make_array(
652  _mm_srai_epi32(_mm_unpacklo_epi16(__vi, __vi), 16),
653  _mm_srai_epi32(_mm_unpackhi_epi16(__vi, __vi), 16));
654  }
655  else if constexpr (sizeof(_FromT) == 4 && sizeof(_ToT) == 8
656  && is_integral_v<_FromT> && is_integral_v<_ToT>)
657  {
658  if constexpr (is_unsigned_v<_FromT>)
659  return __make_array(_mm_unpacklo_epi32(__vi, __m128i()),
660  _mm_unpackhi_epi32(__vi, __m128i()));
661  else
662  return __make_array(
663  _mm_unpacklo_epi32(__vi, _mm_srai_epi32(__vi, 31)),
664  _mm_unpackhi_epi32(__vi, _mm_srai_epi32(__vi, 31)));
665  }
666  else if constexpr (sizeof(_FromT) == 4 && sizeof(_ToT) == 8
667  && is_integral_v<_FromT> && is_integral_v<_ToT>)
668  {
669  if constexpr (is_unsigned_v<_FromT>)
670  return __make_array(_mm_unpacklo_epi32(__vi, __m128i()),
671  _mm_unpackhi_epi32(__vi, __m128i()));
672  else
673  return __make_array(
674  _mm_unpacklo_epi32(__vi, _mm_srai_epi32(__vi, 31)),
675  _mm_unpackhi_epi32(__vi, _mm_srai_epi32(__vi, 31)));
676  }
677  else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) >= 4
678  && is_signed_v<_FromT>)
679  {
680  const __m128i __vv[2] = {_mm_unpacklo_epi8(__vi, __vi),
681  _mm_unpackhi_epi8(__vi, __vi)};
682  const __vector_type_t<int, 4> __vvvv[4] = {
683  __vector_bitcast<int>(_mm_unpacklo_epi16(__vv[0], __vv[0])),
684  __vector_bitcast<int>(_mm_unpackhi_epi16(__vv[0], __vv[0])),
685  __vector_bitcast<int>(_mm_unpacklo_epi16(__vv[1], __vv[1])),
686  __vector_bitcast<int>(_mm_unpackhi_epi16(__vv[1], __vv[1]))};
687  if constexpr (sizeof(_ToT) == 4)
688  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
689  return __vector_convert<_To>(
690  _SimdWrapper<int, 4>(__vvvv[__i] >> 24));
691  });
692  else if constexpr (is_integral_v<_ToT>)
693  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
694  const auto __signbits = __to_intrin(__vvvv[__i / 2] >> 31);
695  const auto __sx32 = __to_intrin(__vvvv[__i / 2] >> 24);
696  return __vector_bitcast<_ToT>(
697  __i % 2 == 0 ? _mm_unpacklo_epi32(__sx32, __signbits)
698  : _mm_unpackhi_epi32(__sx32, __signbits));
699  });
700  else
701  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
702  const _SimdWrapper<int, 4> __int4 = __vvvv[__i / 2] >> 24;
703  return __vector_convert<_To>(
704  __i % 2 == 0 ? __int4
705  : _SimdWrapper<int, 4>(
706  _mm_unpackhi_epi64(__to_intrin(__int4),
707  __to_intrin(__int4))));
708  });
709  }
710  else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) == 4)
711  {
712  const auto __shorts = __convert_all<__vector_type16_t<
713  conditional_t<is_signed_v<_FromT>, short, unsigned short>>>(
714  __adjust(_SizeConstant<(_Np + 1) / 2 * 8>(), __v));
715  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
716  return __convert_all<_To>(__shorts[__i / 2])[__i % 2];
717  });
718  }
719  else if constexpr (sizeof(_FromT) == 2 && sizeof(_ToT) == 8
720  && is_signed_v<_FromT> && is_integral_v<_ToT>)
721  {
722  const __m128i __vv[2] = {_mm_unpacklo_epi16(__vi, __vi),
723  _mm_unpackhi_epi16(__vi, __vi)};
724  const __vector_type16_t<int> __vvvv[4]
725  = {__vector_bitcast<int>(
726  _mm_unpacklo_epi32(_mm_srai_epi32(__vv[0], 16),
727  _mm_srai_epi32(__vv[0], 31))),
728  __vector_bitcast<int>(
729  _mm_unpackhi_epi32(_mm_srai_epi32(__vv[0], 16),
730  _mm_srai_epi32(__vv[0], 31))),
731  __vector_bitcast<int>(
732  _mm_unpacklo_epi32(_mm_srai_epi32(__vv[1], 16),
733  _mm_srai_epi32(__vv[1], 31))),
734  __vector_bitcast<int>(
735  _mm_unpackhi_epi32(_mm_srai_epi32(__vv[1], 16),
736  _mm_srai_epi32(__vv[1], 31)))};
737  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
738  return __vector_bitcast<_ToT>(__vvvv[__i]);
739  });
740  }
741  else if constexpr (sizeof(_FromT) <= 2 && sizeof(_ToT) == 8)
742  {
743  const auto __ints
744  = __convert_all<__vector_type16_t<conditional_t<
745  is_signed_v<_FromT> || is_floating_point_v<_ToT>, int,
746  unsigned int>>>(
747  __adjust(_SizeConstant<(_Np + 1) / 2 * 4>(), __v));
748  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
749  return __convert_all<_To>(__ints[__i / 2])[__i % 2];
750  });
751  }
752  else
753  __assert_unreachable<_To>();
754  }
755 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
756  else if constexpr ((_FromVT::_S_partial_width - _Offset)
757  > _ToVT::_S_full_size)
758  {
759  /*
760  static_assert(
761  (_FromVT::_S_partial_width & (_FromVT::_S_partial_width - 1)) ==
762  0,
763  "__convert_all only supports power-of-2 number of elements.
764  Otherwise " "the return type cannot be array<_To, N>.");
765  */
766  constexpr size_t _NTotal
767  = (_FromVT::_S_partial_width - _Offset) / _ToVT::_S_full_size;
768  constexpr size_t _Np = _NParts == 0 ? _NTotal : _NParts;
769  static_assert(
770  _Np <= _NTotal
771  || (_Np == _NTotal + 1
772  && (_FromVT::_S_partial_width - _Offset) % _ToVT::_S_full_size
773  > 0));
774  using _R = array<_To, _Np>;
775  if constexpr (_Np == 1)
776  return _R{__vector_convert<_To>(
777  __extract_part<_Offset, _FromVT::_S_partial_width,
778  _ToVT::_S_full_size>(__v))};
779  else
780  return __generate_from_n_evaluations<_Np, _R>([&](
781  auto __i) constexpr {
782  auto __part
783  = __extract_part<__i * _ToVT::_S_full_size + _Offset,
784  _FromVT::_S_partial_width,
785  _ToVT::_S_full_size>(__v);
786  return __vector_convert<_To>(__part);
787  });
788  }
789  else if constexpr (_Offset == 0)
790  return array<_To, 1>{__vector_convert<_To>(__v)};
791  else
792  return array<_To, 1>{__vector_convert<_To>(
793  __extract_part<_Offset, _FromVT::_S_partial_width,
794  _FromVT::_S_partial_width - _Offset>(__v))};
795  }
796  }
797 
798 // }}}
799 
800 // _GnuTraits {{{
801 template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
802  struct _GnuTraits
803  {
804  using _IsValid = true_type;
805  using _SimdImpl = typename _Abi::_SimdImpl;
806  using _MaskImpl = typename _Abi::_MaskImpl;
807 
808  // simd and simd_mask member types {{{
809  using _SimdMember = _SimdWrapper<_Tp, _Np>;
810  using _MaskMember = _SimdWrapper<_Mp, _Np>;
811  static constexpr size_t _S_simd_align = alignof(_SimdMember);
812  static constexpr size_t _S_mask_align = alignof(_MaskMember);
813 
814  // }}}
815  // size metadata {{{
816  static constexpr size_t _S_full_size = _SimdMember::_S_full_size;
817  static constexpr bool _S_is_partial = _SimdMember::_S_is_partial;
818 
819  // }}}
820  // _SimdBase / base class for simd, providing extra conversions {{{
821  struct _SimdBase2
822  {
823  explicit operator __intrinsic_type_t<_Tp, _Np>() const
824  {
825  return __to_intrin(static_cast<const simd<_Tp, _Abi>*>(this)->_M_data);
826  }
827  explicit operator __vector_type_t<_Tp, _Np>() const
828  {
829  return static_cast<const simd<_Tp, _Abi>*>(this)->_M_data.__builtin();
830  }
831  };
832 
833  struct _SimdBase1
834  {
835  explicit operator __intrinsic_type_t<_Tp, _Np>() const
836  { return __data(*static_cast<const simd<_Tp, _Abi>*>(this)); }
837  };
838 
839  using _SimdBase = conditional_t<
840  is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value,
841  _SimdBase1, _SimdBase2>;
842 
843  // }}}
844  // _MaskBase {{{
845  struct _MaskBase2
846  {
847  explicit operator __intrinsic_type_t<_Tp, _Np>() const
848  {
849  return static_cast<const simd_mask<_Tp, _Abi>*>(this)
850  ->_M_data.__intrin();
851  }
852  explicit operator __vector_type_t<_Tp, _Np>() const
853  {
854  return static_cast<const simd_mask<_Tp, _Abi>*>(this)->_M_data._M_data;
855  }
856  };
857 
858  struct _MaskBase1
859  {
860  explicit operator __intrinsic_type_t<_Tp, _Np>() const
861  { return __data(*static_cast<const simd_mask<_Tp, _Abi>*>(this)); }
862  };
863 
864  using _MaskBase = conditional_t<
865  is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value,
866  _MaskBase1, _MaskBase2>;
867 
868  // }}}
869  // _MaskCastType {{{
870  // parameter type of one explicit simd_mask constructor
871  class _MaskCastType
872  {
873  using _Up = __intrinsic_type_t<_Tp, _Np>;
874  _Up _M_data;
875 
876  public:
877  _MaskCastType(_Up __x) : _M_data(__x) {}
878  operator _MaskMember() const { return _M_data; }
879  };
880 
881  // }}}
882  // _SimdCastType {{{
883  // parameter type of one explicit simd constructor
884  class _SimdCastType1
885  {
886  using _Ap = __intrinsic_type_t<_Tp, _Np>;
887  _SimdMember _M_data;
888 
889  public:
890  _SimdCastType1(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {}
891  operator _SimdMember() const { return _M_data; }
892  };
893 
894  class _SimdCastType2
895  {
896  using _Ap = __intrinsic_type_t<_Tp, _Np>;
897  using _Bp = __vector_type_t<_Tp, _Np>;
898  _SimdMember _M_data;
899 
900  public:
901  _SimdCastType2(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {}
902  _SimdCastType2(_Bp __b) : _M_data(__b) {}
903  operator _SimdMember() const { return _M_data; }
904  };
905 
906  using _SimdCastType = conditional_t<
907  is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value,
908  _SimdCastType1, _SimdCastType2>;
909  //}}}
910  };
911 
912 // }}}
913 struct _CommonImplX86;
914 struct _CommonImplNeon;
915 struct _CommonImplBuiltin;
916 template <typename _Abi> struct _SimdImplBuiltin;
917 template <typename _Abi> struct _MaskImplBuiltin;
918 template <typename _Abi> struct _SimdImplX86;
919 template <typename _Abi> struct _MaskImplX86;
920 template <typename _Abi> struct _SimdImplNeon;
921 template <typename _Abi> struct _MaskImplNeon;
922 template <typename _Abi> struct _SimdImplPpc;
923 template <typename _Abi> struct _MaskImplPpc;
924 
925 // simd_abi::_VecBuiltin {{{
926 template <int _UsedBytes>
927  struct simd_abi::_VecBuiltin
928  {
929  template <typename _Tp>
930  static constexpr size_t _S_size = _UsedBytes / sizeof(_Tp);
931 
932  // validity traits {{{
933  struct _IsValidAbiTag : __bool_constant<(_UsedBytes > 1)> {};
934 
935  template <typename _Tp>
936  struct _IsValidSizeFor
937  : __bool_constant<(_UsedBytes / sizeof(_Tp) > 1
938  && _UsedBytes % sizeof(_Tp) == 0
939  && _UsedBytes <= __vectorized_sizeof<_Tp>()
940  && (!__have_avx512f || _UsedBytes <= 32))> {};
941 
942  template <typename _Tp>
943  struct _IsValid : conjunction<_IsValidAbiTag, __is_vectorizable<_Tp>,
944  _IsValidSizeFor<_Tp>> {};
945 
946  template <typename _Tp>
947  static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value;
948 
949  // }}}
950  // _SimdImpl/_MaskImpl {{{
951 #if _GLIBCXX_SIMD_X86INTRIN
952  using _CommonImpl = _CommonImplX86;
953  using _SimdImpl = _SimdImplX86<_VecBuiltin<_UsedBytes>>;
954  using _MaskImpl = _MaskImplX86<_VecBuiltin<_UsedBytes>>;
955 #elif _GLIBCXX_SIMD_HAVE_NEON
956  using _CommonImpl = _CommonImplNeon;
957  using _SimdImpl = _SimdImplNeon<_VecBuiltin<_UsedBytes>>;
958  using _MaskImpl = _MaskImplNeon<_VecBuiltin<_UsedBytes>>;
959 #else
960  using _CommonImpl = _CommonImplBuiltin;
961 #ifdef __ALTIVEC__
962  using _SimdImpl = _SimdImplPpc<_VecBuiltin<_UsedBytes>>;
963  using _MaskImpl = _MaskImplPpc<_VecBuiltin<_UsedBytes>>;
964 #else
965  using _SimdImpl = _SimdImplBuiltin<_VecBuiltin<_UsedBytes>>;
966  using _MaskImpl = _MaskImplBuiltin<_VecBuiltin<_UsedBytes>>;
967 #endif
968 #endif
969 
970  // }}}
971  // __traits {{{
972  template <typename _Tp>
973  using _MaskValueType = __int_for_sizeof_t<_Tp>;
974 
975  template <typename _Tp>
976  using __traits
977  = conditional_t<_S_is_valid_v<_Tp>,
978  _GnuTraits<_Tp, _MaskValueType<_Tp>,
979  _VecBuiltin<_UsedBytes>, _S_size<_Tp>>,
980  _InvalidTraits>;
981 
982  //}}}
983  // size metadata {{{
984  template <typename _Tp>
985  static constexpr size_t _S_full_size = __traits<_Tp>::_S_full_size;
986 
987  template <typename _Tp>
988  static constexpr bool _S_is_partial = __traits<_Tp>::_S_is_partial;
989 
990  // }}}
991  // implicit masks {{{
992  template <typename _Tp>
993  using _MaskMember = _SimdWrapper<_MaskValueType<_Tp>, _S_size<_Tp>>;
994 
995  template <typename _Tp>
996  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
997  _S_implicit_mask()
998  {
999  using _UV = typename _MaskMember<_Tp>::_BuiltinType;
1000  if constexpr (!_MaskMember<_Tp>::_S_is_partial)
1001  return ~_UV();
1002  else
1003  {
1004  constexpr auto __size = _S_size<_Tp>;
1005  _GLIBCXX_SIMD_USE_CONSTEXPR auto __r = __generate_vector<_UV>(
1006  [](auto __i) constexpr { return __i < __size ? -1 : 0; });
1007  return __r;
1008  }
1009  }
1010 
1011  template <typename _Tp>
1012  _GLIBCXX_SIMD_INTRINSIC static constexpr __intrinsic_type_t<_Tp,
1013  _S_size<_Tp>>
1014  _S_implicit_mask_intrin()
1015  {
1016  return __to_intrin(
1017  __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>()._M_data));
1018  }
1019 
1020  template <typename _TW, typename _TVT = _VectorTraits<_TW>>
1021  _GLIBCXX_SIMD_INTRINSIC static constexpr _TW _S_masked(_TW __x)
1022  {
1023  using _Tp = typename _TVT::value_type;
1024  if constexpr (!_MaskMember<_Tp>::_S_is_partial)
1025  return __x;
1026  else
1027  return __and(__as_vector(__x),
1028  __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>()));
1029  }
1030 
1031  template <typename _TW, typename _TVT = _VectorTraits<_TW>>
1032  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
1033  __make_padding_nonzero(_TW __x)
1034  {
1035  using _Tp = typename _TVT::value_type;
1036  if constexpr (!_S_is_partial<_Tp>)
1037  return __x;
1038  else
1039  {
1040  _GLIBCXX_SIMD_USE_CONSTEXPR auto __implicit_mask
1041  = __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>());
1042  if constexpr (is_integral_v<_Tp>)
1043  return __or(__x, ~__implicit_mask);
1044  else
1045  {
1046  _GLIBCXX_SIMD_USE_CONSTEXPR auto __one
1047  = __andnot(__implicit_mask,
1048  __vector_broadcast<_S_full_size<_Tp>>(_Tp(1)));
1049  // it's not enough to return `x | 1_in_padding` because the
1050  // padding in x might be inf or nan (independent of
1051  // __FINITE_MATH_ONLY__, because it's about padding bits)
1052  return __or(__and(__x, __implicit_mask), __one);
1053  }
1054  }
1055  }
1056  // }}}
1057  };
1058 
1059 // }}}
1060 // simd_abi::_VecBltnBtmsk {{{
1061 template <int _UsedBytes>
1062  struct simd_abi::_VecBltnBtmsk
1063  {
1064  template <typename _Tp>
1065  static constexpr size_t _S_size = _UsedBytes / sizeof(_Tp);
1066 
1067  // validity traits {{{
1068  struct _IsValidAbiTag : __bool_constant<(_UsedBytes > 1)> {};
1069 
1070  template <typename _Tp>
1071  struct _IsValidSizeFor
1072  : __bool_constant<(_UsedBytes / sizeof(_Tp) > 1
1073  && _UsedBytes % sizeof(_Tp) == 0 && _UsedBytes <= 64
1074  && (_UsedBytes > 32 || __have_avx512vl))> {};
1075 
1076  // Bitmasks require at least AVX512F. If sizeof(_Tp) < 4 the AVX512BW is also
1077  // required.
1078  template <typename _Tp>
1079  struct _IsValid
1080  : conjunction<
1081  _IsValidAbiTag, __bool_constant<__have_avx512f>,
1082  __bool_constant<__have_avx512bw || (sizeof(_Tp) >= 4)>,
1083  __bool_constant<(__vectorized_sizeof<_Tp>() > sizeof(_Tp))>,
1084  _IsValidSizeFor<_Tp>> {};
1085 
1086  template <typename _Tp>
1087  static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value;
1088 
1089  // }}}
1090  // simd/_MaskImpl {{{
1091  #if _GLIBCXX_SIMD_X86INTRIN
1092  using _CommonImpl = _CommonImplX86;
1093  using _SimdImpl = _SimdImplX86<_VecBltnBtmsk<_UsedBytes>>;
1094  using _MaskImpl = _MaskImplX86<_VecBltnBtmsk<_UsedBytes>>;
1095  #else
1096  template <int>
1097  struct _MissingImpl;
1098 
1099  using _CommonImpl = _MissingImpl<_UsedBytes>;
1100  using _SimdImpl = _MissingImpl<_UsedBytes>;
1101  using _MaskImpl = _MissingImpl<_UsedBytes>;
1102  #endif
1103 
1104  // }}}
1105  // __traits {{{
1106  template <typename _Tp>
1107  using _MaskMember = _SimdWrapper<bool, _S_size<_Tp>>;
1108 
1109  template <typename _Tp>
1110  using __traits = conditional_t<
1111  _S_is_valid_v<_Tp>,
1112  _GnuTraits<_Tp, bool, _VecBltnBtmsk<_UsedBytes>, _S_size<_Tp>>,
1113  _InvalidTraits>;
1114 
1115  //}}}
1116  // size metadata {{{
1117  template <typename _Tp>
1118  static constexpr size_t _S_full_size = __traits<_Tp>::_S_full_size;
1119  template <typename _Tp>
1120  static constexpr bool _S_is_partial = __traits<_Tp>::_S_is_partial;
1121 
1122  // }}}
1123  // implicit mask {{{
1124  private:
1125  template <typename _Tp>
1126  using _ImplicitMask = _SimdWrapper<bool, _S_size<_Tp>>;
1127 
1128  public:
1129  template <size_t _Np>
1130  _GLIBCXX_SIMD_INTRINSIC static constexpr __bool_storage_member_type_t<_Np>
1131  __implicit_mask_n()
1132  {
1133  using _Tp = __bool_storage_member_type_t<_Np>;
1134  return _Np < sizeof(_Tp) * __CHAR_BIT__ ? _Tp((1ULL << _Np) - 1) : ~_Tp();
1135  }
1136 
1137  template <typename _Tp>
1138  _GLIBCXX_SIMD_INTRINSIC static constexpr _ImplicitMask<_Tp>
1139  _S_implicit_mask()
1140  { return __implicit_mask_n<_S_size<_Tp>>(); }
1141 
1142  template <typename _Tp>
1143  _GLIBCXX_SIMD_INTRINSIC static constexpr __bool_storage_member_type_t<
1144  _S_size<_Tp>>
1145  _S_implicit_mask_intrin()
1146  { return __implicit_mask_n<_S_size<_Tp>>(); }
1147 
1148  template <typename _Tp, size_t _Np>
1149  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1150  _S_masked(_SimdWrapper<_Tp, _Np> __x)
1151  {
1152  if constexpr (is_same_v<_Tp, bool>)
1153  if constexpr (_Np < 8 || (_Np & (_Np - 1)) != 0)
1154  return _MaskImpl::_S_bit_and(
1155  __x, _SimdWrapper<_Tp, _Np>(
1156  __bool_storage_member_type_t<_Np>((1ULL << _Np) - 1)));
1157  else
1158  return __x;
1159  else
1160  return _S_masked(__x._M_data);
1161  }
1162 
1163  template <typename _TV>
1164  _GLIBCXX_SIMD_INTRINSIC static constexpr _TV
1165  _S_masked(_TV __x)
1166  {
1167  using _Tp = typename _VectorTraits<_TV>::value_type;
1168  static_assert(
1169  !__is_bitmask_v<_TV>,
1170  "_VecBltnBtmsk::_S_masked cannot work on bitmasks, since it doesn't "
1171  "know the number of elements. Use _SimdWrapper<bool, N> instead.");
1172  if constexpr (_S_is_partial<_Tp>)
1173  {
1174  constexpr size_t _Np = _S_size<_Tp>;
1175  return __make_dependent_t<_TV, _CommonImpl>::_S_blend(
1176  _S_implicit_mask<_Tp>(), _SimdWrapper<_Tp, _Np>(),
1177  _SimdWrapper<_Tp, _Np>(__x));
1178  }
1179  else
1180  return __x;
1181  }
1182 
1183  template <typename _TV, typename _TVT = _VectorTraits<_TV>>
1184  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
1185  __make_padding_nonzero(_TV __x)
1186  {
1187  using _Tp = typename _TVT::value_type;
1188  if constexpr (!_S_is_partial<_Tp>)
1189  return __x;
1190  else
1191  {
1192  constexpr size_t _Np = _S_size<_Tp>;
1193  if constexpr (is_integral_v<typename _TVT::value_type>)
1194  return __x
1195  | __generate_vector<_Tp, _S_full_size<_Tp>>(
1196  [](auto __i) -> _Tp {
1197  if (__i < _Np)
1198  return 0;
1199  else
1200  return 1;
1201  });
1202  else
1203  return __make_dependent_t<_TV, _CommonImpl>::_S_blend(
1204  _S_implicit_mask<_Tp>(),
1205  _SimdWrapper<_Tp, _Np>(
1206  __vector_broadcast<_S_full_size<_Tp>>(_Tp(1))),
1207  _SimdWrapper<_Tp, _Np>(__x))
1208  ._M_data;
1209  }
1210  }
1211 
1212  // }}}
1213  };
1214 
1215 //}}}
1216 // _CommonImplBuiltin {{{
1217 struct _CommonImplBuiltin
1218 {
1219  // _S_converts_via_decomposition{{{
1220  // This lists all cases where a __vector_convert needs to fall back to
1221  // conversion of individual scalars (i.e. decompose the input vector into
1222  // scalars, convert, compose output vector). In those cases, _S_masked_load &
1223  // _S_masked_store prefer to use the _S_bit_iteration implementation.
1224  template <typename _From, typename _To, size_t _ToSize>
1225  static inline constexpr bool __converts_via_decomposition_v
1226  = sizeof(_From) != sizeof(_To);
1227 
1228  // }}}
1229  // _S_load{{{
1230  template <typename _Tp, size_t _Np, size_t _Bytes = _Np * sizeof(_Tp)>
1231  _GLIBCXX_SIMD_INTRINSIC static __vector_type_t<_Tp, _Np>
1232  _S_load(const void* __p)
1233  {
1234  static_assert(_Np > 1);
1235  static_assert(_Bytes % sizeof(_Tp) == 0);
1236  using _Rp = __vector_type_t<_Tp, _Np>;
1237  if constexpr (sizeof(_Rp) == _Bytes)
1238  {
1239  _Rp __r;
1240  __builtin_memcpy(&__r, __p, _Bytes);
1241  return __r;
1242  }
1243  else
1244  {
1245 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424
1246  using _Up = conditional_t<
1247  is_integral_v<_Tp>,
1248  conditional_t<_Bytes % 4 == 0,
1249  conditional_t<_Bytes % 8 == 0, long long, int>,
1250  conditional_t<_Bytes % 2 == 0, short, signed char>>,
1251  conditional_t<(_Bytes < 8 || _Np % 2 == 1 || _Np == 2), _Tp,
1252  double>>;
1253  using _V = __vector_type_t<_Up, _Np * sizeof(_Tp) / sizeof(_Up)>;
1254  if constexpr (sizeof(_V) != sizeof(_Rp))
1255  { // on i386 with 4 < _Bytes <= 8
1256  _Rp __r{};
1257  __builtin_memcpy(&__r, __p, _Bytes);
1258  return __r;
1259  }
1260  else
1261 #else // _GLIBCXX_SIMD_WORKAROUND_PR90424
1262  using _V = _Rp;
1263 #endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
1264  {
1265  _V __r{};
1266  static_assert(_Bytes <= sizeof(_V));
1267  __builtin_memcpy(&__r, __p, _Bytes);
1268  return reinterpret_cast<_Rp>(__r);
1269  }
1270  }
1271  }
1272 
1273  // }}}
1274  // _S_store {{{
1275  template <size_t _ReqBytes = 0, typename _TV>
1276  _GLIBCXX_SIMD_INTRINSIC static void _S_store(_TV __x, void* __addr)
1277  {
1278  constexpr size_t _Bytes = _ReqBytes == 0 ? sizeof(__x) : _ReqBytes;
1279  static_assert(sizeof(__x) >= _Bytes);
1280 
1281  if constexpr (__is_vector_type_v<_TV>)
1282  {
1283  using _Tp = typename _VectorTraits<_TV>::value_type;
1284  constexpr size_t _Np = _Bytes / sizeof(_Tp);
1285  static_assert(_Np * sizeof(_Tp) == _Bytes);
1286 
1287 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424
1288  using _Up = conditional_t<
1289  (is_integral_v<_Tp> || _Bytes < 4),
1290  conditional_t<(sizeof(__x) > sizeof(long long)), long long, _Tp>,
1291  float>;
1292  const auto __v = __vector_bitcast<_Up>(__x);
1293 #else // _GLIBCXX_SIMD_WORKAROUND_PR90424
1294  const __vector_type_t<_Tp, _Np> __v = __x;
1295 #endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
1296 
1297  if constexpr ((_Bytes & (_Bytes - 1)) != 0)
1298  {
1299  constexpr size_t _MoreBytes = std::__bit_ceil(_Bytes);
1300  alignas(decltype(__v)) char __tmp[_MoreBytes];
1301  __builtin_memcpy(__tmp, &__v, _MoreBytes);
1302  __builtin_memcpy(__addr, __tmp, _Bytes);
1303  }
1304  else
1305  __builtin_memcpy(__addr, &__v, _Bytes);
1306  }
1307  else
1308  __builtin_memcpy(__addr, &__x, _Bytes);
1309  }
1310 
1311  template <typename _Tp, size_t _Np>
1312  _GLIBCXX_SIMD_INTRINSIC static void _S_store(_SimdWrapper<_Tp, _Np> __x,
1313  void* __addr)
1314  { _S_store<_Np * sizeof(_Tp)>(__x._M_data, __addr); }
1315 
1316  // }}}
1317  // _S_store_bool_array(_BitMask) {{{
1318  template <size_t _Np, bool _Sanitized>
1319  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1320  _S_store_bool_array(_BitMask<_Np, _Sanitized> __x, bool* __mem)
1321  {
1322  if constexpr (_Np == 1)
1323  __mem[0] = __x[0];
1324  else if constexpr (_Np == 2)
1325  {
1326  short __bool2 = (__x._M_to_bits() * 0x81) & 0x0101;
1327  _S_store<_Np>(__bool2, __mem);
1328  }
1329  else if constexpr (_Np == 3)
1330  {
1331  int __bool3 = (__x._M_to_bits() * 0x4081) & 0x010101;
1332  _S_store<_Np>(__bool3, __mem);
1333  }
1334  else
1335  {
1336  __execute_n_times<__div_roundup(_Np, 4)>([&](auto __i) {
1337  constexpr int __offset = __i * 4;
1338  constexpr int __remaining = _Np - __offset;
1339  if constexpr (__remaining > 4 && __remaining <= 7)
1340  {
1341  const _ULLong __bool7
1342  = (__x.template _M_extract<__offset>()._M_to_bits()
1343  * 0x40810204081ULL)
1344  & 0x0101010101010101ULL;
1345  _S_store<__remaining>(__bool7, __mem + __offset);
1346  }
1347  else if constexpr (__remaining >= 4)
1348  {
1349  int __bits = __x.template _M_extract<__offset>()._M_to_bits();
1350  if constexpr (__remaining > 7)
1351  __bits &= 0xf;
1352  const int __bool4 = (__bits * 0x204081) & 0x01010101;
1353  _S_store<4>(__bool4, __mem + __offset);
1354  }
1355  });
1356  }
1357  }
1358 
1359  // }}}
1360  // _S_blend{{{
1361  template <typename _Tp, size_t _Np>
1362  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
1363  _S_blend(_SimdWrapper<__int_for_sizeof_t<_Tp>, _Np> __k,
1364  _SimdWrapper<_Tp, _Np> __at0, _SimdWrapper<_Tp, _Np> __at1)
1365  { return __k._M_data ? __at1._M_data : __at0._M_data; }
1366 
1367  // }}}
1368 };
1369 
1370 // }}}
1371 // _SimdImplBuiltin {{{1
1372 template <typename _Abi>
1373  struct _SimdImplBuiltin
1374  {
1375  // member types {{{2
1376  template <typename _Tp>
1377  static constexpr size_t _S_max_store_size = 16;
1378 
1379  using abi_type = _Abi;
1380 
1381  template <typename _Tp>
1382  using _TypeTag = _Tp*;
1383 
1384  template <typename _Tp>
1385  using _SimdMember = typename _Abi::template __traits<_Tp>::_SimdMember;
1386 
1387  template <typename _Tp>
1388  using _MaskMember = typename _Abi::template _MaskMember<_Tp>;
1389 
1390  template <typename _Tp>
1391  static constexpr size_t _S_size = _Abi::template _S_size<_Tp>;
1392 
1393  template <typename _Tp>
1394  static constexpr size_t _S_full_size = _Abi::template _S_full_size<_Tp>;
1395 
1396  using _CommonImpl = typename _Abi::_CommonImpl;
1397  using _SuperImpl = typename _Abi::_SimdImpl;
1398  using _MaskImpl = typename _Abi::_MaskImpl;
1399 
1400  // _M_make_simd(_SimdWrapper/__intrinsic_type_t) {{{2
1401  template <typename _Tp, size_t _Np>
1402  _GLIBCXX_SIMD_INTRINSIC static simd<_Tp, _Abi>
1403  _M_make_simd(_SimdWrapper<_Tp, _Np> __x)
1404  { return {__private_init, __x}; }
1405 
1406  template <typename _Tp, size_t _Np>
1407  _GLIBCXX_SIMD_INTRINSIC static simd<_Tp, _Abi>
1408  _M_make_simd(__intrinsic_type_t<_Tp, _Np> __x)
1409  { return {__private_init, __vector_bitcast<_Tp>(__x)}; }
1410 
1411  // _S_broadcast {{{2
1412  template <typename _Tp>
1413  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
1414  _S_broadcast(_Tp __x) noexcept
1415  { return __vector_broadcast<_S_full_size<_Tp>>(__x); }
1416 
1417  // _S_generator {{{2
1418  template <typename _Fp, typename _Tp>
1419  inline static constexpr _SimdMember<_Tp> _S_generator(_Fp&& __gen,
1420  _TypeTag<_Tp>)
1421  {
1422  return __generate_vector<_Tp, _S_full_size<_Tp>>([&](
1423  auto __i) constexpr {
1424  if constexpr (__i < _S_size<_Tp>)
1425  return __gen(__i);
1426  else
1427  return 0;
1428  });
1429  }
1430 
1431  // _S_load {{{2
1432  template <typename _Tp, typename _Up>
1433  _GLIBCXX_SIMD_INTRINSIC static _SimdMember<_Tp>
1434  _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept
1435  {
1436  constexpr size_t _Np = _S_size<_Tp>;
1437  constexpr size_t __max_load_size
1438  = (sizeof(_Up) >= 4 && __have_avx512f) || __have_avx512bw ? 64
1439  : (is_floating_point_v<_Up> && __have_avx) || __have_avx2 ? 32
1440  : 16;
1441  constexpr size_t __bytes_to_load = sizeof(_Up) * _Np;
1442  if constexpr (sizeof(_Up) > 8)
1443  return __generate_vector<_Tp, _SimdMember<_Tp>::_S_full_size>([&](
1444  auto __i) constexpr {
1445  return static_cast<_Tp>(__i < _Np ? __mem[__i] : 0);
1446  });
1447  else if constexpr (is_same_v<_Up, _Tp>)
1448  return _CommonImpl::template _S_load<_Tp, _S_full_size<_Tp>,
1449  _Np * sizeof(_Tp)>(__mem);
1450  else if constexpr (__bytes_to_load <= __max_load_size)
1451  return __convert<_SimdMember<_Tp>>(
1452  _CommonImpl::template _S_load<_Up, _Np>(__mem));
1453  else if constexpr (__bytes_to_load % __max_load_size == 0)
1454  {
1455  constexpr size_t __n_loads = __bytes_to_load / __max_load_size;
1456  constexpr size_t __elements_per_load = _Np / __n_loads;
1457  return __call_with_n_evaluations<__n_loads>(
1458  [](auto... __uncvted) {
1459  return __convert<_SimdMember<_Tp>>(__uncvted...);
1460  },
1461  [&](auto __i) {
1462  return _CommonImpl::template _S_load<_Up, __elements_per_load>(
1463  __mem + __i * __elements_per_load);
1464  });
1465  }
1466  else if constexpr (__bytes_to_load % (__max_load_size / 2) == 0
1467  && __max_load_size > 16)
1468  { // e.g. int[] -> <char, 12> with AVX2
1469  constexpr size_t __n_loads
1470  = __bytes_to_load / (__max_load_size / 2);
1471  constexpr size_t __elements_per_load = _Np / __n_loads;
1472  return __call_with_n_evaluations<__n_loads>(
1473  [](auto... __uncvted) {
1474  return __convert<_SimdMember<_Tp>>(__uncvted...);
1475  },
1476  [&](auto __i) {
1477  return _CommonImpl::template _S_load<_Up, __elements_per_load>(
1478  __mem + __i * __elements_per_load);
1479  });
1480  }
1481  else // e.g. int[] -> <char, 9>
1482  return __call_with_subscripts(
1483  __mem, make_index_sequence<_Np>(), [](auto... __args) {
1484  return __vector_type_t<_Tp, _S_full_size<_Tp>>{
1485  static_cast<_Tp>(__args)...};
1486  });
1487  }
1488 
1489  // _S_masked_load {{{2
1490  template <typename _Tp, size_t _Np, typename _Up>
1491  static inline _SimdWrapper<_Tp, _Np>
1492  _S_masked_load(_SimdWrapper<_Tp, _Np> __merge, _MaskMember<_Tp> __k,
1493  const _Up* __mem) noexcept
1494  {
1495  _BitOps::_S_bit_iteration(_MaskImpl::_S_to_bits(__k), [&](auto __i) {
1496  __merge._M_set(__i, static_cast<_Tp>(__mem[__i]));
1497  });
1498  return __merge;
1499  }
1500 
1501  // _S_store {{{2
1502  template <typename _Tp, typename _Up>
1503  _GLIBCXX_SIMD_INTRINSIC static void
1504  _S_store(_SimdMember<_Tp> __v, _Up* __mem, _TypeTag<_Tp>) noexcept
1505  {
1506  // TODO: converting int -> "smaller int" can be optimized with AVX512
1507  constexpr size_t _Np = _S_size<_Tp>;
1508  constexpr size_t __max_store_size
1509  = _SuperImpl::template _S_max_store_size<_Up>;
1510  if constexpr (sizeof(_Up) > 8)
1511  __execute_n_times<_Np>([&](auto __i) constexpr {
1512  __mem[__i] = __v[__i];
1513  });
1514  else if constexpr (is_same_v<_Up, _Tp>)
1515  _CommonImpl::_S_store(__v, __mem);
1516  else if constexpr (sizeof(_Up) * _Np <= __max_store_size)
1517  _CommonImpl::_S_store(_SimdWrapper<_Up, _Np>(__convert<_Up>(__v)),
1518  __mem);
1519  else
1520  {
1521  constexpr size_t __vsize = __max_store_size / sizeof(_Up);
1522  // round up to convert the last partial vector as well:
1523  constexpr size_t __stores = __div_roundup(_Np, __vsize);
1524  constexpr size_t __full_stores = _Np / __vsize;
1525  using _V = __vector_type_t<_Up, __vsize>;
1526  const array<_V, __stores> __converted
1527  = __convert_all<_V, __stores>(__v);
1528  __execute_n_times<__full_stores>([&](auto __i) constexpr {
1529  _CommonImpl::_S_store(__converted[__i], __mem + __i * __vsize);
1530  });
1531  if constexpr (__full_stores < __stores)
1532  _CommonImpl::template _S_store<(_Np - __full_stores * __vsize)
1533  * sizeof(_Up)>(
1534  __converted[__full_stores], __mem + __full_stores * __vsize);
1535  }
1536  }
1537 
1538  // _S_masked_store_nocvt {{{2
1539  template <typename _Tp, size_t _Np>
1540  _GLIBCXX_SIMD_INTRINSIC static void
1541  _S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem,
1542  _MaskMember<_Tp> __k)
1543  {
1544  _BitOps::_S_bit_iteration(
1545  _MaskImpl::_S_to_bits(__k), [&](auto __i) constexpr {
1546  __mem[__i] = __v[__i];
1547  });
1548  }
1549 
1550  // _S_masked_store {{{2
1551  template <typename _TW, typename _TVT = _VectorTraits<_TW>,
1552  typename _Tp = typename _TVT::value_type, typename _Up>
1553  static inline void
1554  _S_masked_store(const _TW __v, _Up* __mem, const _MaskMember<_Tp> __k)
1555  noexcept
1556  {
1557  constexpr size_t _TV_size = _S_size<_Tp>;
1558  [[maybe_unused]] const auto __vi = __to_intrin(__v);
1559  constexpr size_t __max_store_size
1560  = _SuperImpl::template _S_max_store_size<_Up>;
1561  if constexpr (
1562  is_same_v<
1563  _Tp,
1564  _Up> || (is_integral_v<_Tp> && is_integral_v<_Up> && sizeof(_Tp) == sizeof(_Up)))
1565  {
1566  // bitwise or no conversion, reinterpret:
1567  const _MaskMember<_Up> __kk = [&]() {
1568  if constexpr (__is_bitmask_v<decltype(__k)>)
1569  return _MaskMember<_Up>(__k._M_data);
1570  else
1571  return __wrapper_bitcast<__int_for_sizeof_t<_Up>>(__k);
1572  }();
1573  _SuperImpl::_S_masked_store_nocvt(__wrapper_bitcast<_Up>(__v),
1574  __mem, __kk);
1575  }
1576  else if constexpr (__vectorized_sizeof<_Up>() > sizeof(_Up)
1577  && !_CommonImpl::
1578  template __converts_via_decomposition_v<
1579  _Tp, _Up, __max_store_size>)
1580  { // conversion via decomposition is better handled via the
1581  // bit_iteration
1582  // fallback below
1583  constexpr size_t _UW_size
1584  = std::min(_TV_size, __max_store_size / sizeof(_Up));
1585  static_assert(_UW_size <= _TV_size);
1586  using _UW = _SimdWrapper<_Up, _UW_size>;
1587  using _UV = __vector_type_t<_Up, _UW_size>;
1588  using _UAbi = simd_abi::deduce_t<_Up, _UW_size>;
1589  if constexpr (_UW_size == _TV_size) // one convert+store
1590  {
1591  const _UW __converted = __convert<_UW>(__v);
1592  _SuperImpl::_S_masked_store_nocvt(
1593  __converted, __mem,
1594  _UAbi::_MaskImpl::template _S_convert<
1595  __int_for_sizeof_t<_Up>>(__k));
1596  }
1597  else
1598  {
1599  static_assert(_UW_size * sizeof(_Up) == __max_store_size);
1600  constexpr size_t _NFullStores = _TV_size / _UW_size;
1601  constexpr size_t _NAllStores
1602  = __div_roundup(_TV_size, _UW_size);
1603  constexpr size_t _NParts = _S_full_size<_Tp> / _UW_size;
1604  const array<_UV, _NAllStores> __converted
1605  = __convert_all<_UV, _NAllStores>(__v);
1606  __execute_n_times<_NFullStores>([&](auto __i) {
1607  _SuperImpl::_S_masked_store_nocvt(
1608  _UW(__converted[__i]), __mem + __i * _UW_size,
1609  _UAbi::_MaskImpl::template _S_convert<
1610  __int_for_sizeof_t<_Up>>(
1611  __extract_part<__i, _NParts>(__k.__as_full_vector())));
1612  });
1613  if constexpr (_NAllStores
1614  > _NFullStores) // one partial at the end
1615  _SuperImpl::_S_masked_store_nocvt(
1616  _UW(__converted[_NFullStores]),
1617  __mem + _NFullStores * _UW_size,
1618  _UAbi::_MaskImpl::template _S_convert<
1619  __int_for_sizeof_t<_Up>>(
1620  __extract_part<_NFullStores, _NParts>(
1621  __k.__as_full_vector())));
1622  }
1623  }
1624  else
1625  _BitOps::_S_bit_iteration(
1626  _MaskImpl::_S_to_bits(__k), [&](auto __i) constexpr {
1627  __mem[__i] = static_cast<_Up>(__v[__i]);
1628  });
1629  }
1630 
1631  // _S_complement {{{2
1632  template <typename _Tp, size_t _Np>
1633  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1634  _S_complement(_SimdWrapper<_Tp, _Np> __x) noexcept
1635  { return ~__x._M_data; }
1636 
1637  // _S_unary_minus {{{2
1638  template <typename _Tp, size_t _Np>
1639  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1640  _S_unary_minus(_SimdWrapper<_Tp, _Np> __x) noexcept
1641  {
1642  // GCC doesn't use the psign instructions, but pxor & psub seem to be
1643  // just as good a choice as pcmpeqd & psign. So meh.
1644  return -__x._M_data;
1645  }
1646 
1647  // arithmetic operators {{{2
1648  template <typename _Tp, size_t _Np>
1649  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1650  _S_plus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1651  { return __x._M_data + __y._M_data; }
1652 
1653  template <typename _Tp, size_t _Np>
1654  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1655  _S_minus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1656  { return __x._M_data - __y._M_data; }
1657 
1658  template <typename _Tp, size_t _Np>
1659  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1660  _S_multiplies(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1661  { return __x._M_data * __y._M_data; }
1662 
1663  template <typename _Tp, size_t _Np>
1664  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1665  _S_divides(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1666  {
1667  // Note that division by 0 is always UB, so we must ensure we avoid the
1668  // case for partial registers
1669  if constexpr (!_Abi::template _S_is_partial<_Tp>)
1670  return __x._M_data / __y._M_data;
1671  else
1672  return __x._M_data / _Abi::__make_padding_nonzero(__y._M_data);
1673  }
1674 
1675  template <typename _Tp, size_t _Np>
1676  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1677  _S_modulus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1678  {
1679  if constexpr (!_Abi::template _S_is_partial<_Tp>)
1680  return __x._M_data % __y._M_data;
1681  else
1682  return __as_vector(__x)
1683  % _Abi::__make_padding_nonzero(__as_vector(__y));
1684  }
1685 
1686  template <typename _Tp, size_t _Np>
1687  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1688  _S_bit_and(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1689  { return __and(__x, __y); }
1690 
1691  template <typename _Tp, size_t _Np>
1692  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1693  _S_bit_or(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1694  { return __or(__x, __y); }
1695 
1696  template <typename _Tp, size_t _Np>
1697  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1698  _S_bit_xor(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1699  { return __xor(__x, __y); }
1700 
1701  template <typename _Tp, size_t _Np>
1702  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
1703  _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1704  { return __x._M_data << __y._M_data; }
1705 
1706  template <typename _Tp, size_t _Np>
1707  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
1708  _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1709  { return __x._M_data >> __y._M_data; }
1710 
1711  template <typename _Tp, size_t _Np>
1712  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1713  _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, int __y)
1714  { return __x._M_data << __y; }
1715 
1716  template <typename _Tp, size_t _Np>
1717  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1718  _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, int __y)
1719  { return __x._M_data >> __y; }
1720 
1721  // compares {{{2
1722  // _S_equal_to {{{3
1723  template <typename _Tp, size_t _Np>
1724  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1725  _S_equal_to(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1726  { return __x._M_data == __y._M_data; }
1727 
1728  // _S_not_equal_to {{{3
1729  template <typename _Tp, size_t _Np>
1730  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1731  _S_not_equal_to(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1732  { return __x._M_data != __y._M_data; }
1733 
1734  // _S_less {{{3
1735  template <typename _Tp, size_t _Np>
1736  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1737  _S_less(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1738  { return __x._M_data < __y._M_data; }
1739 
1740  // _S_less_equal {{{3
1741  template <typename _Tp, size_t _Np>
1742  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1743  _S_less_equal(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1744  { return __x._M_data <= __y._M_data; }
1745 
1746  // _S_negate {{{2
1747  template <typename _Tp, size_t _Np>
1748  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1749  _S_negate(_SimdWrapper<_Tp, _Np> __x) noexcept
1750  { return !__x._M_data; }
1751 
1752  // _S_min, _S_max, _S_minmax {{{2
1753  template <typename _Tp, size_t _Np>
1754  _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
1755  _SimdWrapper<_Tp, _Np>
1756  _S_min(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b)
1757  { return __a._M_data < __b._M_data ? __a._M_data : __b._M_data; }
1758 
1759  template <typename _Tp, size_t _Np>
1760  _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
1761  _SimdWrapper<_Tp, _Np>
1762  _S_max(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b)
1763  { return __a._M_data > __b._M_data ? __a._M_data : __b._M_data; }
1764 
1765  template <typename _Tp, size_t _Np>
1766  _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
1767  pair<_SimdWrapper<_Tp, _Np>, _SimdWrapper<_Tp, _Np>>
1768  _S_minmax(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b)
1769  {
1770  return {__a._M_data < __b._M_data ? __a._M_data : __b._M_data,
1771  __a._M_data < __b._M_data ? __b._M_data : __a._M_data};
1772  }
1773 
1774  // reductions {{{2
1775  template <size_t _Np, size_t... _Is, size_t... _Zeros, typename _Tp,
1776  typename _BinaryOperation>
1777  _GLIBCXX_SIMD_INTRINSIC static _Tp
1778  _S_reduce_partial(index_sequence<_Is...>, index_sequence<_Zeros...>,
1779  simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op)
1780  {
1781  using _V = __vector_type_t<_Tp, _Np / 2>;
1782  static_assert(sizeof(_V) <= sizeof(__x));
1783  // _S_full_size is the size of the smallest native SIMD register that
1784  // can store _Np/2 elements:
1785  using _FullSimd = __deduced_simd<_Tp, _VectorTraits<_V>::_S_full_size>;
1786  using _HalfSimd = __deduced_simd<_Tp, _Np / 2>;
1787  const auto __xx = __as_vector(__x);
1788  return _HalfSimd::abi_type::_SimdImpl::_S_reduce(
1789  static_cast<_HalfSimd>(__as_vector(__binary_op(
1790  static_cast<_FullSimd>(__intrin_bitcast<_V>(__xx)),
1791  static_cast<_FullSimd>(__intrin_bitcast<_V>(
1792  __vector_permute<(_Np / 2 + _Is)..., (int(_Zeros * 0) - 1)...>(
1793  __xx)))))),
1794  __binary_op);
1795  }
1796 
1797  template <typename _Tp, typename _BinaryOperation>
1798  _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
1799  _S_reduce(simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op)
1800  {
1801  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
1802  if constexpr (_Np == 1)
1803  return __x[0];
1804  else if constexpr (_Np == 2)
1805  return __binary_op(simd<_Tp, simd_abi::scalar>(__x[0]),
1806  simd<_Tp, simd_abi::scalar>(__x[1]))[0];
1807  else if constexpr (_Abi::template _S_is_partial<_Tp>) //{{{
1808  {
1809  [[maybe_unused]] constexpr auto __full_size
1810  = _Abi::template _S_full_size<_Tp>;
1811  if constexpr (_Np == 3)
1812  return __binary_op(
1813  __binary_op(simd<_Tp, simd_abi::scalar>(__x[0]),
1814  simd<_Tp, simd_abi::scalar>(__x[1])),
1815  simd<_Tp, simd_abi::scalar>(__x[2]))[0];
1816  else if constexpr (is_same_v<__remove_cvref_t<_BinaryOperation>,
1817  plus<>>)
1818  {
1819  using _Ap = simd_abi::deduce_t<_Tp, __full_size>;
1820  return _Ap::_SimdImpl::_S_reduce(
1821  simd<_Tp, _Ap>(__private_init,
1822  _Abi::_S_masked(__as_vector(__x))),
1823  __binary_op);
1824  }
1825  else if constexpr (is_same_v<__remove_cvref_t<_BinaryOperation>,
1826  multiplies<>>)
1827  {
1828  using _Ap = simd_abi::deduce_t<_Tp, __full_size>;
1829  using _TW = _SimdWrapper<_Tp, __full_size>;
1830  _GLIBCXX_SIMD_USE_CONSTEXPR auto __implicit_mask_full
1831  = _Abi::template _S_implicit_mask<_Tp>().__as_full_vector();
1832  _GLIBCXX_SIMD_USE_CONSTEXPR _TW __one
1833  = __vector_broadcast<__full_size>(_Tp(1));
1834  const _TW __x_full = __data(__x).__as_full_vector();
1835  const _TW __x_padded_with_ones
1836  = _Ap::_CommonImpl::_S_blend(__implicit_mask_full, __one,
1837  __x_full);
1838  return _Ap::_SimdImpl::_S_reduce(
1839  simd<_Tp, _Ap>(__private_init, __x_padded_with_ones),
1840  __binary_op);
1841  }
1842  else if constexpr (_Np & 1)
1843  {
1844  using _Ap = simd_abi::deduce_t<_Tp, _Np - 1>;
1845  return __binary_op(
1846  simd<_Tp, simd_abi::scalar>(_Ap::_SimdImpl::_S_reduce(
1847  simd<_Tp, _Ap>(
1848  __intrin_bitcast<__vector_type_t<_Tp, _Np - 1>>(
1849  __as_vector(__x))),
1850  __binary_op)),
1851  simd<_Tp, simd_abi::scalar>(__x[_Np - 1]))[0];
1852  }
1853  else
1854  return _S_reduce_partial<_Np>(
1855  make_index_sequence<_Np / 2>(),
1856  make_index_sequence<__full_size - _Np / 2>(), __x, __binary_op);
1857  } //}}}
1858  else if constexpr (sizeof(__x) == 16) //{{{
1859  {
1860  if constexpr (_Np == 16)
1861  {
1862  const auto __y = __data(__x);
1863  __x = __binary_op(
1864  _M_make_simd<_Tp, _Np>(
1865  __vector_permute<0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
1866  7, 7>(__y)),
1867  _M_make_simd<_Tp, _Np>(
1868  __vector_permute<8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
1869  14, 14, 15, 15>(__y)));
1870  }
1871  if constexpr (_Np >= 8)
1872  {
1873  const auto __y = __vector_bitcast<short>(__data(__x));
1874  __x = __binary_op(
1875  _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
1876  __vector_permute<0, 0, 1, 1, 2, 2, 3, 3>(__y))),
1877  _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
1878  __vector_permute<4, 4, 5, 5, 6, 6, 7, 7>(__y))));
1879  }
1880  if constexpr (_Np >= 4)
1881  {
1882  using _Up = conditional_t<is_floating_point_v<_Tp>, float, int>;
1883  const auto __y = __vector_bitcast<_Up>(__data(__x));
1884  __x = __binary_op(__x,
1885  _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
1886  __vector_permute<3, 2, 1, 0>(__y))));
1887  }
1888  using _Up = conditional_t<is_floating_point_v<_Tp>, double, _LLong>;
1889  const auto __y = __vector_bitcast<_Up>(__data(__x));
1890  __x = __binary_op(__x, _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
1891  __vector_permute<1, 1>(__y))));
1892  return __x[0];
1893  } //}}}
1894  else
1895  {
1896  static_assert(sizeof(__x) > __min_vector_size<_Tp>);
1897  static_assert((_Np & (_Np - 1)) == 0); // _Np must be a power of 2
1898  using _Ap = simd_abi::deduce_t<_Tp, _Np / 2>;
1899  using _V = simd<_Tp, _Ap>;
1900  return _Ap::_SimdImpl::_S_reduce(
1901  __binary_op(_V(__private_init, __extract<0, 2>(__as_vector(__x))),
1902  _V(__private_init,
1903  __extract<1, 2>(__as_vector(__x)))),
1904  static_cast<_BinaryOperation&&>(__binary_op));
1905  }
1906  }
1907 
1908  // math {{{2
1909  // frexp, modf and copysign implemented in simd_math.h
1910 #define _GLIBCXX_SIMD_MATH_FALLBACK(__name) \
1911  template <typename _Tp, typename... _More> \
1912  static _Tp _S_##__name(const _Tp& __x, const _More&... __more) \
1913  { \
1914  return __generate_vector<_Tp>( \
1915  [&](auto __i) { return __name(__x[__i], __more[__i]...); }); \
1916  }
1917 
1918 #define _GLIBCXX_SIMD_MATH_FALLBACK_MASKRET(__name) \
1919  template <typename _Tp, typename... _More> \
1920  static typename _Tp::mask_type _S_##__name(const _Tp& __x, \
1921  const _More&... __more) \
1922  { \
1923  return __generate_vector<_Tp>( \
1924  [&](auto __i) { return __name(__x[__i], __more[__i]...); }); \
1925  }
1926 
1927 #define _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(_RetTp, __name) \
1928  template <typename _Tp, typename... _More> \
1929  static auto _S_##__name(const _Tp& __x, const _More&... __more) \
1930  { \
1931  return __fixed_size_storage_t<_RetTp, \
1932  _VectorTraits<_Tp>::_S_partial_width>:: \
1933  _S_generate([&](auto __meta) constexpr { \
1934  return __meta._S_generator( \
1935  [&](auto __i) { \
1936  return __name(__x[__meta._S_offset + __i], \
1937  __more[__meta._S_offset + __i]...); \
1938  }, \
1939  static_cast<_RetTp*>(nullptr)); \
1940  }); \
1941  }
1942 
1943  _GLIBCXX_SIMD_MATH_FALLBACK(acos)
1944  _GLIBCXX_SIMD_MATH_FALLBACK(asin)
1945  _GLIBCXX_SIMD_MATH_FALLBACK(atan)
1946  _GLIBCXX_SIMD_MATH_FALLBACK(atan2)
1947  _GLIBCXX_SIMD_MATH_FALLBACK(cos)
1948  _GLIBCXX_SIMD_MATH_FALLBACK(sin)
1949  _GLIBCXX_SIMD_MATH_FALLBACK(tan)
1950  _GLIBCXX_SIMD_MATH_FALLBACK(acosh)
1951  _GLIBCXX_SIMD_MATH_FALLBACK(asinh)
1952  _GLIBCXX_SIMD_MATH_FALLBACK(atanh)
1953  _GLIBCXX_SIMD_MATH_FALLBACK(cosh)
1954  _GLIBCXX_SIMD_MATH_FALLBACK(sinh)
1955  _GLIBCXX_SIMD_MATH_FALLBACK(tanh)
1956  _GLIBCXX_SIMD_MATH_FALLBACK(exp)
1957  _GLIBCXX_SIMD_MATH_FALLBACK(exp2)
1958  _GLIBCXX_SIMD_MATH_FALLBACK(expm1)
1959  _GLIBCXX_SIMD_MATH_FALLBACK(ldexp)
1960  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(int, ilogb)
1961  _GLIBCXX_SIMD_MATH_FALLBACK(log)
1962  _GLIBCXX_SIMD_MATH_FALLBACK(log10)
1963  _GLIBCXX_SIMD_MATH_FALLBACK(log1p)
1964  _GLIBCXX_SIMD_MATH_FALLBACK(log2)
1965  _GLIBCXX_SIMD_MATH_FALLBACK(logb)
1966 
1967  // modf implemented in simd_math.h
1968  _GLIBCXX_SIMD_MATH_FALLBACK(scalbn)
1969  _GLIBCXX_SIMD_MATH_FALLBACK(scalbln)
1970  _GLIBCXX_SIMD_MATH_FALLBACK(cbrt)
1971  _GLIBCXX_SIMD_MATH_FALLBACK(fabs)
1972  _GLIBCXX_SIMD_MATH_FALLBACK(pow)
1973  _GLIBCXX_SIMD_MATH_FALLBACK(sqrt)
1974  _GLIBCXX_SIMD_MATH_FALLBACK(erf)
1975  _GLIBCXX_SIMD_MATH_FALLBACK(erfc)
1976  _GLIBCXX_SIMD_MATH_FALLBACK(lgamma)
1977  _GLIBCXX_SIMD_MATH_FALLBACK(tgamma)
1978 
1979  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lrint)
1980  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llrint)
1981 
1982  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lround)
1983  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llround)
1984 
1985  _GLIBCXX_SIMD_MATH_FALLBACK(fmod)
1986  _GLIBCXX_SIMD_MATH_FALLBACK(remainder)
1987 
1988  template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1989  static _Tp
1990  _S_remquo(const _Tp __x, const _Tp __y,
1991  __fixed_size_storage_t<int, _TVT::_S_partial_width>* __z)
1992  {
1993  return __generate_vector<_Tp>([&](auto __i) {
1994  int __tmp;
1995  auto __r = remquo(__x[__i], __y[__i], &__tmp);
1996  __z->_M_set(__i, __tmp);
1997  return __r;
1998  });
1999  }
2000 
2001  // copysign in simd_math.h
2002  _GLIBCXX_SIMD_MATH_FALLBACK(nextafter)
2003  _GLIBCXX_SIMD_MATH_FALLBACK(fdim)
2004  _GLIBCXX_SIMD_MATH_FALLBACK(fmax)
2005  _GLIBCXX_SIMD_MATH_FALLBACK(fmin)
2006  _GLIBCXX_SIMD_MATH_FALLBACK(fma)
2007 
2008  template <typename _Tp, size_t _Np>
2009  static constexpr _MaskMember<_Tp>
2010  _S_isgreater(_SimdWrapper<_Tp, _Np> __x,
2011  _SimdWrapper<_Tp, _Np> __y) noexcept
2012  {
2013  using _Ip = __int_for_sizeof_t<_Tp>;
2014  const auto __xn = __vector_bitcast<_Ip>(__x);
2015  const auto __yn = __vector_bitcast<_Ip>(__y);
2016  const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
2017  const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
2018  return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
2019  __xp > __yp);
2020  }
2021 
2022  template <typename _Tp, size_t _Np>
2023  static constexpr _MaskMember<_Tp>
2024  _S_isgreaterequal(_SimdWrapper<_Tp, _Np> __x,
2025  _SimdWrapper<_Tp, _Np> __y) noexcept
2026  {
2027  using _Ip = __int_for_sizeof_t<_Tp>;
2028  const auto __xn = __vector_bitcast<_Ip>(__x);
2029  const auto __yn = __vector_bitcast<_Ip>(__y);
2030  const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
2031  const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
2032  return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
2033  __xp >= __yp);
2034  }
2035 
2036  template <typename _Tp, size_t _Np>
2037  static constexpr _MaskMember<_Tp>
2038  _S_isless(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) noexcept
2039  {
2040  using _Ip = __int_for_sizeof_t<_Tp>;
2041  const auto __xn = __vector_bitcast<_Ip>(__x);
2042  const auto __yn = __vector_bitcast<_Ip>(__y);
2043  const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
2044  const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
2045  return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
2046  __xp < __yp);
2047  }
2048 
2049  template <typename _Tp, size_t _Np>
2050  static constexpr _MaskMember<_Tp>
2051  _S_islessequal(_SimdWrapper<_Tp, _Np> __x,
2052  _SimdWrapper<_Tp, _Np> __y) noexcept
2053  {
2054  using _Ip = __int_for_sizeof_t<_Tp>;
2055  const auto __xn = __vector_bitcast<_Ip>(__x);
2056  const auto __yn = __vector_bitcast<_Ip>(__y);
2057  const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
2058  const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
2059  return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
2060  __xp <= __yp);
2061  }
2062 
2063  template <typename _Tp, size_t _Np>
2064  static constexpr _MaskMember<_Tp>
2065  _S_islessgreater(_SimdWrapper<_Tp, _Np> __x,
2066  _SimdWrapper<_Tp, _Np> __y) noexcept
2067  {
2068  return __andnot(_SuperImpl::_S_isunordered(__x, __y),
2069  _SuperImpl::_S_not_equal_to(__x, __y));
2070  }
2071 
2072 #undef _GLIBCXX_SIMD_MATH_FALLBACK
2073 #undef _GLIBCXX_SIMD_MATH_FALLBACK_MASKRET
2074 #undef _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET
2075  // _S_abs {{{3
2076  template <typename _Tp, size_t _Np>
2077  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2078  _S_abs(_SimdWrapper<_Tp, _Np> __x) noexcept
2079  {
2080  // if (__builtin_is_constant_evaluated())
2081  // {
2082  // return __x._M_data < 0 ? -__x._M_data : __x._M_data;
2083  // }
2084  if constexpr (is_floating_point_v<_Tp>)
2085  // `v < 0 ? -v : v` cannot compile to the efficient implementation of
2086  // masking the signbit off because it must consider v == -0
2087 
2088  // ~(-0.) & v would be easy, but breaks with fno-signed-zeros
2089  return __and(_S_absmask<__vector_type_t<_Tp, _Np>>, __x._M_data);
2090  else
2091  return __x._M_data < 0 ? -__x._M_data : __x._M_data;
2092  }
2093 
2094  // }}}3
2095  // _S_plus_minus {{{
2096  // Returns __x + __y - __y without -fassociative-math optimizing to __x.
2097  // - _TV must be __vector_type_t<floating-point type, N>.
2098  // - _UV must be _TV or floating-point type.
2099  template <typename _TV, typename _UV>
2100  _GLIBCXX_SIMD_INTRINSIC static constexpr _TV _S_plus_minus(_TV __x,
2101  _UV __y) noexcept
2102  {
2103  #if defined __i386__ && !defined __SSE_MATH__
2104  if constexpr (sizeof(__x) == 8)
2105  { // operations on __x would use the FPU
2106  static_assert(is_same_v<_TV, __vector_type_t<float, 2>>);
2107  const auto __x4 = __vector_bitcast<float, 4>(__x);
2108  if constexpr (is_same_v<_TV, _UV>)
2109  return __vector_bitcast<float, 2>(
2110  _S_plus_minus(__x4, __vector_bitcast<float, 4>(__y)));
2111  else
2112  return __vector_bitcast<float, 2>(_S_plus_minus(__x4, __y));
2113  }
2114  #endif
2115  #if !defined __clang__ && __GCC_IEC_559 == 0
2116  if (__builtin_is_constant_evaluated()
2117  || (__builtin_constant_p(__x) && __builtin_constant_p(__y)))
2118  return (__x + __y) - __y;
2119  else
2120  return [&] {
2121  __x += __y;
2122  if constexpr(__have_sse)
2123  {
2124  if constexpr (sizeof(__x) >= 16)
2125  asm("" : "+x"(__x));
2126  else if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>)
2127  asm("" : "+x"(__x[0]), "+x"(__x[1]));
2128  else
2129  __assert_unreachable<_TV>();
2130  }
2131  else if constexpr(__have_neon)
2132  asm("" : "+w"(__x));
2133  else if constexpr (__have_power_vmx)
2134  {
2135  if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>)
2136  asm("" : "+fgr"(__x[0]), "+fgr"(__x[1]));
2137  else
2138  asm("" : "+v"(__x));
2139  }
2140  else
2141  asm("" : "+g"(__x));
2142  return __x - __y;
2143  }();
2144  #else
2145  return (__x + __y) - __y;
2146  #endif
2147  }
2148 
2149  // }}}
2150  // _S_nearbyint {{{3
2151  template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2152  _GLIBCXX_SIMD_INTRINSIC static _Tp _S_nearbyint(_Tp __x_) noexcept
2153  {
2154  using value_type = typename _TVT::value_type;
2155  using _V = typename _TVT::type;
2156  const _V __x = __x_;
2157  const _V __absx = __and(__x, _S_absmask<_V>);
2158  static_assert(__CHAR_BIT__ * sizeof(1ull) >= __digits_v<value_type>);
2159  _GLIBCXX_SIMD_USE_CONSTEXPR _V __shifter_abs
2160  = _V() + (1ull << (__digits_v<value_type> - 1));
2161  const _V __shifter = __or(__and(_S_signmask<_V>, __x), __shifter_abs);
2162  const _V __shifted = _S_plus_minus(__x, __shifter);
2163  return __absx < __shifter_abs ? __shifted : __x;
2164  }
2165 
2166  // _S_rint {{{3
2167  template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2168  _GLIBCXX_SIMD_INTRINSIC static _Tp _S_rint(_Tp __x) noexcept
2169  {
2170  return _SuperImpl::_S_nearbyint(__x);
2171  }
2172 
2173  // _S_trunc {{{3
2174  template <typename _Tp, size_t _Np>
2175  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2176  _S_trunc(_SimdWrapper<_Tp, _Np> __x)
2177  {
2178  using _V = __vector_type_t<_Tp, _Np>;
2179  const _V __absx = __and(__x._M_data, _S_absmask<_V>);
2180  static_assert(__CHAR_BIT__ * sizeof(1ull) >= __digits_v<_Tp>);
2181  constexpr _Tp __shifter = 1ull << (__digits_v<_Tp> - 1);
2182  _V __truncated = _S_plus_minus(__absx, __shifter);
2183  __truncated -= __truncated > __absx ? _V() + 1 : _V();
2184  return __absx < __shifter ? __or(__xor(__absx, __x._M_data), __truncated)
2185  : __x._M_data;
2186  }
2187 
2188  // _S_round {{{3
2189  template <typename _Tp, size_t _Np>
2190  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2191  _S_round(_SimdWrapper<_Tp, _Np> __x)
2192  {
2193  const auto __abs_x = _SuperImpl::_S_abs(__x);
2194  const auto __t_abs = _SuperImpl::_S_trunc(__abs_x)._M_data;
2195  const auto __r_abs // round(abs(x)) =
2196  = __t_abs + (__abs_x._M_data - __t_abs >= _Tp(.5) ? _Tp(1) : 0);
2197  return __or(__xor(__abs_x._M_data, __x._M_data), __r_abs);
2198  }
2199 
2200  // _S_floor {{{3
2201  template <typename _Tp, size_t _Np>
2202  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2203  _S_floor(_SimdWrapper<_Tp, _Np> __x)
2204  {
2205  const auto __y = _SuperImpl::_S_trunc(__x)._M_data;
2206  const auto __negative_input
2207  = __vector_bitcast<_Tp>(__x._M_data < __vector_broadcast<_Np, _Tp>(0));
2208  const auto __mask
2209  = __andnot(__vector_bitcast<_Tp>(__y == __x._M_data), __negative_input);
2210  return __or(__andnot(__mask, __y),
2211  __and(__mask, __y - __vector_broadcast<_Np, _Tp>(1)));
2212  }
2213 
2214  // _S_ceil {{{3
2215  template <typename _Tp, size_t _Np>
2216  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2217  _S_ceil(_SimdWrapper<_Tp, _Np> __x)
2218  {
2219  const auto __y = _SuperImpl::_S_trunc(__x)._M_data;
2220  const auto __negative_input
2221  = __vector_bitcast<_Tp>(__x._M_data < __vector_broadcast<_Np, _Tp>(0));
2222  const auto __inv_mask
2223  = __or(__vector_bitcast<_Tp>(__y == __x._M_data), __negative_input);
2224  return __or(__and(__inv_mask, __y),
2225  __andnot(__inv_mask, __y + __vector_broadcast<_Np, _Tp>(1)));
2226  }
2227 
2228  // _S_isnan {{{3
2229  template <typename _Tp, size_t _Np>
2230  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2231  _S_isnan([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x)
2232  {
2233  #if __FINITE_MATH_ONLY__
2234  return {}; // false
2235  #elif !defined __SUPPORT_SNAN__
2236  return ~(__x._M_data == __x._M_data);
2237  #elif defined __STDC_IEC_559__
2238  using _Ip = __int_for_sizeof_t<_Tp>;
2239  const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x));
2240  const auto __infn
2241  = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__infinity_v<_Tp>));
2242  return __infn < __absn;
2243  #else
2244  #error "Not implemented: how to support SNaN but non-IEC559 floating-point?"
2245  #endif
2246  }
2247 
2248  // _S_isfinite {{{3
2249  template <typename _Tp, size_t _Np>
2250  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2251  _S_isfinite([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x)
2252  {
2253  #if __FINITE_MATH_ONLY__
2254  using _UV = typename _MaskMember<_Tp>::_BuiltinType;
2255  _GLIBCXX_SIMD_USE_CONSTEXPR _UV __alltrue = ~_UV();
2256  return __alltrue;
2257  #else
2258  // if all exponent bits are set, __x is either inf or NaN
2259  using _Ip = __int_for_sizeof_t<_Tp>;
2260  const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x));
2261  const auto __maxn
2262  = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__finite_max_v<_Tp>));
2263  return __absn <= __maxn;
2264  #endif
2265  }
2266 
2267  // _S_isunordered {{{3
2268  template <typename _Tp, size_t _Np>
2269  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2270  _S_isunordered(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
2271  {
2272  return __or(_S_isnan(__x), _S_isnan(__y));
2273  }
2274 
2275  // _S_signbit {{{3
2276  template <typename _Tp, size_t _Np>
2277  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2278  _S_signbit(_SimdWrapper<_Tp, _Np> __x)
2279  {
2280  using _Ip = __int_for_sizeof_t<_Tp>;
2281  return __vector_bitcast<_Ip>(__x) < 0;
2282  // Arithmetic right shift (SRA) would also work (instead of compare), but
2283  // 64-bit SRA isn't available on x86 before AVX512. And in general,
2284  // compares are more likely to be efficient than SRA.
2285  }
2286 
2287  // _S_isinf {{{3
2288  template <typename _Tp, size_t _Np>
2289  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2290  _S_isinf([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x)
2291  {
2292  #if __FINITE_MATH_ONLY__
2293  return {}; // false
2294  #else
2295  return _SuperImpl::template _S_equal_to<_Tp, _Np>(_SuperImpl::_S_abs(__x),
2296  __vector_broadcast<_Np>(
2297  __infinity_v<_Tp>));
2298  // alternative:
2299  // compare to inf using the corresponding integer type
2300  /*
2301  return
2302  __vector_bitcast<_Tp>(__vector_bitcast<__int_for_sizeof_t<_Tp>>(
2303  _S_abs(__x)._M_data)
2304  ==
2305  __vector_bitcast<__int_for_sizeof_t<_Tp>>(__vector_broadcast<_Np>(
2306  __infinity_v<_Tp>)));
2307  */
2308  #endif
2309  }
2310 
2311  // _S_isnormal {{{3
2312  template <typename _Tp, size_t _Np>
2313  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2314  _S_isnormal(_SimdWrapper<_Tp, _Np> __x)
2315  {
2316  using _Ip = __int_for_sizeof_t<_Tp>;
2317  const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x));
2318  const auto __minn
2319  = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__norm_min_v<_Tp>));
2320  #if __FINITE_MATH_ONLY__
2321  return __absn >= __minn;
2322  #else
2323  const auto __maxn
2324  = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__finite_max_v<_Tp>));
2325  return __minn <= __absn && __absn <= __maxn;
2326  #endif
2327  }
2328 
2329  // _S_fpclassify {{{3
2330  template <typename _Tp, size_t _Np>
2331  _GLIBCXX_SIMD_INTRINSIC static __fixed_size_storage_t<int, _Np>
2332  _S_fpclassify(_SimdWrapper<_Tp, _Np> __x)
2333  {
2334  using _I = __int_for_sizeof_t<_Tp>;
2335  const auto __xn
2336  = __vector_bitcast<_I>(__to_intrin(_SuperImpl::_S_abs(__x)));
2337  constexpr size_t _NI = sizeof(__xn) / sizeof(_I);
2338  _GLIBCXX_SIMD_USE_CONSTEXPR auto __minn
2339  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__norm_min_v<_Tp>));
2340  _GLIBCXX_SIMD_USE_CONSTEXPR auto __infn
2341  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>));
2342 
2343  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_normal
2344  = __vector_broadcast<_NI, _I>(FP_NORMAL);
2345  #if !__FINITE_MATH_ONLY__
2346  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_nan
2347  = __vector_broadcast<_NI, _I>(FP_NAN);
2348  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_infinite
2349  = __vector_broadcast<_NI, _I>(FP_INFINITE);
2350  #endif
2351  #ifndef __FAST_MATH__
2352  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_subnormal
2353  = __vector_broadcast<_NI, _I>(FP_SUBNORMAL);
2354  #endif
2355  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_zero
2356  = __vector_broadcast<_NI, _I>(FP_ZERO);
2357 
2358  __vector_type_t<_I, _NI>
2359  __tmp = __xn < __minn
2360  #ifdef __FAST_MATH__
2361  ? __fp_zero
2362  #else
2363  ? (__xn == 0 ? __fp_zero : __fp_subnormal)
2364  #endif
2365  #if __FINITE_MATH_ONLY__
2366  : __fp_normal;
2367  #else
2368  : (__xn < __infn ? __fp_normal
2369  : (__xn == __infn ? __fp_infinite : __fp_nan));
2370  #endif
2371 
2372  if constexpr (sizeof(_I) == sizeof(int))
2373  {
2374  using _FixedInt = __fixed_size_storage_t<int, _Np>;
2375  const auto __as_int = __vector_bitcast<int, _Np>(__tmp);
2376  if constexpr (_FixedInt::_S_tuple_size == 1)
2377  return {__as_int};
2378  else if constexpr (_FixedInt::_S_tuple_size == 2
2379  && is_same_v<
2380  typename _FixedInt::_SecondType::_FirstAbi,
2381  simd_abi::scalar>)
2382  return {__extract<0, 2>(__as_int), __as_int[_Np - 1]};
2383  else if constexpr (_FixedInt::_S_tuple_size == 2)
2384  return {__extract<0, 2>(__as_int),
2385  __auto_bitcast(__extract<1, 2>(__as_int))};
2386  else
2387  __assert_unreachable<_Tp>();
2388  }
2389  else if constexpr (_Np == 2 && sizeof(_I) == 8
2390  && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 2)
2391  {
2392  const auto __aslong = __vector_bitcast<_LLong>(__tmp);
2393  return {int(__aslong[0]), {int(__aslong[1])}};
2394  }
2395  #if _GLIBCXX_SIMD_X86INTRIN
2396  else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 32
2397  && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
2398  return {_mm_packs_epi32(__to_intrin(__lo128(__tmp)),
2399  __to_intrin(__hi128(__tmp)))};
2400  else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 64
2401  && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
2402  return {_mm512_cvtepi64_epi32(__to_intrin(__tmp))};
2403  #endif // _GLIBCXX_SIMD_X86INTRIN
2404  else if constexpr (__fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
2405  return {__call_with_subscripts<_Np>(__vector_bitcast<_LLong>(__tmp),
2406  [](auto... __l) {
2407  return __make_wrapper<int>(__l...);
2408  })};
2409  else
2410  __assert_unreachable<_Tp>();
2411  }
2412 
2413  // _S_increment & _S_decrement{{{2
2414  template <typename _Tp, size_t _Np>
2415  _GLIBCXX_SIMD_INTRINSIC static void
2416  _S_increment(_SimdWrapper<_Tp, _Np>& __x)
2417  { __x = __x._M_data + 1; }
2418 
2419  template <typename _Tp, size_t _Np>
2420  _GLIBCXX_SIMD_INTRINSIC static void
2421  _S_decrement(_SimdWrapper<_Tp, _Np>& __x)
2422  { __x = __x._M_data - 1; }
2423 
2424  // smart_reference access {{{2
2425  template <typename _Tp, size_t _Np, typename _Up>
2426  _GLIBCXX_SIMD_INTRINSIC constexpr static void
2427  _S_set(_SimdWrapper<_Tp, _Np>& __v, int __i, _Up&& __x) noexcept
2428  { __v._M_set(__i, static_cast<_Up&&>(__x)); }
2429 
2430  // _S_masked_assign{{{2
2431  template <typename _Tp, typename _K, size_t _Np>
2432  _GLIBCXX_SIMD_INTRINSIC static void
2433  _S_masked_assign(_SimdWrapper<_K, _Np> __k, _SimdWrapper<_Tp, _Np>& __lhs,
2434  __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs)
2435  {
2436  if (__k._M_is_constprop_none_of())
2437  return;
2438  else if (__k._M_is_constprop_all_of())
2439  __lhs = __rhs;
2440  else
2441  __lhs = _CommonImpl::_S_blend(__k, __lhs, __rhs);
2442  }
2443 
2444  template <typename _Tp, typename _K, size_t _Np>
2445  _GLIBCXX_SIMD_INTRINSIC static void
2446  _S_masked_assign(_SimdWrapper<_K, _Np> __k, _SimdWrapper<_Tp, _Np>& __lhs,
2447  __type_identity_t<_Tp> __rhs)
2448  {
2449  if (__k._M_is_constprop_none_of())
2450  return;
2451  else if (__k._M_is_constprop_all_of())
2452  __lhs = __vector_broadcast<_Np>(__rhs);
2453  else if (__builtin_constant_p(__rhs) && __rhs == 0)
2454  {
2455  if constexpr (!is_same_v<bool, _K>)
2456  // the __andnot optimization only makes sense if __k._M_data is a
2457  // vector register
2458  __lhs._M_data
2459  = __andnot(__vector_bitcast<_Tp>(__k), __lhs._M_data);
2460  else
2461  // for AVX512/__mmask, a _mm512_maskz_mov is best
2462  __lhs
2463  = _CommonImpl::_S_blend(__k, __lhs, _SimdWrapper<_Tp, _Np>());
2464  }
2465  else
2466  __lhs = _CommonImpl::_S_blend(__k, __lhs,
2467  _SimdWrapper<_Tp, _Np>(
2468  __vector_broadcast<_Np>(__rhs)));
2469  }
2470 
2471  // _S_masked_cassign {{{2
2472  template <typename _Op, typename _Tp, typename _K, size_t _Np>
2473  _GLIBCXX_SIMD_INTRINSIC static void
2474  _S_masked_cassign(const _SimdWrapper<_K, _Np> __k,
2475  _SimdWrapper<_Tp, _Np>& __lhs,
2476  const __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs,
2477  _Op __op)
2478  {
2479  if (__k._M_is_constprop_none_of())
2480  return;
2481  else if (__k._M_is_constprop_all_of())
2482  __lhs = __op(_SuperImpl{}, __lhs, __rhs);
2483  else
2484  __lhs = _CommonImpl::_S_blend(__k, __lhs,
2485  __op(_SuperImpl{}, __lhs, __rhs));
2486  }
2487 
2488  template <typename _Op, typename _Tp, typename _K, size_t _Np>
2489  _GLIBCXX_SIMD_INTRINSIC static void
2490  _S_masked_cassign(const _SimdWrapper<_K, _Np> __k,
2491  _SimdWrapper<_Tp, _Np>& __lhs,
2492  const __type_identity_t<_Tp> __rhs, _Op __op)
2493  { _S_masked_cassign(__k, __lhs, __vector_broadcast<_Np>(__rhs), __op); }
2494 
2495  // _S_masked_unary {{{2
2496  template <template <typename> class _Op, typename _Tp, typename _K,
2497  size_t _Np>
2498  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2499  _S_masked_unary(const _SimdWrapper<_K, _Np> __k,
2500  const _SimdWrapper<_Tp, _Np> __v)
2501  {
2502  if (__k._M_is_constprop_none_of())
2503  return __v;
2504  auto __vv = _M_make_simd(__v);
2505  _Op<decltype(__vv)> __op;
2506  if (__k._M_is_constprop_all_of())
2507  return __data(__op(__vv));
2508  else
2509  return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv)));
2510  }
2511 
2512  //}}}2
2513  };
2514 
2515 // _MaskImplBuiltinMixin {{{1
2516 struct _MaskImplBuiltinMixin
2517 {
2518  template <typename _Tp>
2519  using _TypeTag = _Tp*;
2520 
2521  // _S_to_maskvector {{{
2522  template <typename _Up, size_t _ToN = 1>
2523  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN>
2524  _S_to_maskvector(bool __x)
2525  {
2526  static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>);
2527  return __x ? __vector_type_t<_Up, _ToN>{~_Up()}
2528  : __vector_type_t<_Up, _ToN>{};
2529  }
2530 
2531  template <typename _Up, size_t _UpN = 0, size_t _Np, bool _Sanitized,
2532  size_t _ToN = _UpN == 0 ? _Np : _UpN>
2533  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN>
2534  _S_to_maskvector(_BitMask<_Np, _Sanitized> __x)
2535  {
2536  static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>);
2537  return __generate_vector<__vector_type_t<_Up, _ToN>>([&](
2538  auto __i) constexpr {
2539  if constexpr (__i < _Np)
2540  return __x[__i] ? ~_Up() : _Up();
2541  else
2542  return _Up();
2543  });
2544  }
2545 
2546  template <typename _Up, size_t _UpN = 0, typename _Tp, size_t _Np,
2547  size_t _ToN = _UpN == 0 ? _Np : _UpN>
2548  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN>
2549  _S_to_maskvector(_SimdWrapper<_Tp, _Np> __x)
2550  {
2551  static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>);
2552  using _TW = _SimdWrapper<_Tp, _Np>;
2553  using _UW = _SimdWrapper<_Up, _ToN>;
2554  if constexpr (sizeof(_Up) == sizeof(_Tp) && sizeof(_TW) == sizeof(_UW))
2555  return __wrapper_bitcast<_Up, _ToN>(__x);
2556  else if constexpr (is_same_v<_Tp, bool>) // bits -> vector
2557  return _S_to_maskvector<_Up, _ToN>(_BitMask<_Np>(__x._M_data));
2558  else
2559  { // vector -> vector
2560  /*
2561  [[maybe_unused]] const auto __y = __vector_bitcast<_Up>(__x._M_data);
2562  if constexpr (sizeof(_Tp) == 8 && sizeof(_Up) == 4 && sizeof(__y) ==
2563  16) return __vector_permute<1, 3, -1, -1>(__y); else if constexpr
2564  (sizeof(_Tp) == 4 && sizeof(_Up) == 2
2565  && sizeof(__y) == 16)
2566  return __vector_permute<1, 3, 5, 7, -1, -1, -1, -1>(__y);
2567  else if constexpr (sizeof(_Tp) == 8 && sizeof(_Up) == 2
2568  && sizeof(__y) == 16)
2569  return __vector_permute<3, 7, -1, -1, -1, -1, -1, -1>(__y);
2570  else if constexpr (sizeof(_Tp) == 2 && sizeof(_Up) == 1
2571  && sizeof(__y) == 16)
2572  return __vector_permute<1, 3, 5, 7, 9, 11, 13, 15, -1, -1, -1, -1,
2573  -1, -1, -1, -1>(__y); else if constexpr (sizeof(_Tp) == 4 &&
2574  sizeof(_Up) == 1
2575  && sizeof(__y) == 16)
2576  return __vector_permute<3, 7, 11, 15, -1, -1, -1, -1, -1, -1, -1,
2577  -1, -1, -1, -1, -1>(__y); else if constexpr (sizeof(_Tp) == 8 &&
2578  sizeof(_Up) == 1
2579  && sizeof(__y) == 16)
2580  return __vector_permute<7, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
2581  -1, -1, -1, -1, -1>(__y); else
2582  */
2583  {
2584  return __generate_vector<__vector_type_t<_Up, _ToN>>([&](
2585  auto __i) constexpr {
2586  if constexpr (__i < _Np)
2587  return _Up(__x[__i.value]);
2588  else
2589  return _Up();
2590  });
2591  }
2592  }
2593  }
2594 
2595  // }}}
2596  // _S_to_bits {{{
2597  template <typename _Tp, size_t _Np>
2598  _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
2599  _S_to_bits(_SimdWrapper<_Tp, _Np> __x)
2600  {
2601  static_assert(!is_same_v<_Tp, bool>);
2602  static_assert(_Np <= __CHAR_BIT__ * sizeof(_ULLong));
2603  using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2604  const auto __bools
2605  = __vector_bitcast<_Up>(__x) >> (sizeof(_Up) * __CHAR_BIT__ - 1);
2606  _ULLong __r = 0;
2607  __execute_n_times<_Np>(
2608  [&](auto __i) { __r |= _ULLong(__bools[__i.value]) << __i; });
2609  return __r;
2610  }
2611 
2612  // }}}
2613 };
2614 
2615 // _MaskImplBuiltin {{{1
2616 template <typename _Abi>
2617  struct _MaskImplBuiltin : _MaskImplBuiltinMixin
2618  {
2619  using _MaskImplBuiltinMixin::_S_to_bits;
2620  using _MaskImplBuiltinMixin::_S_to_maskvector;
2621 
2622  // member types {{{
2623  template <typename _Tp>
2624  using _SimdMember = typename _Abi::template __traits<_Tp>::_SimdMember;
2625 
2626  template <typename _Tp>
2627  using _MaskMember = typename _Abi::template _MaskMember<_Tp>;
2628 
2629  using _SuperImpl = typename _Abi::_MaskImpl;
2630  using _CommonImpl = typename _Abi::_CommonImpl;
2631 
2632  template <typename _Tp>
2633  static constexpr size_t _S_size = simd_size_v<_Tp, _Abi>;
2634 
2635  // }}}
2636  // _S_broadcast {{{
2637  template <typename _Tp>
2638  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
2639  _S_broadcast(bool __x)
2640  {
2641  return __x ? _Abi::template _S_implicit_mask<_Tp>()
2642  : _MaskMember<_Tp>();
2643  }
2644 
2645  // }}}
2646  // _S_load {{{
2647  template <typename _Tp>
2648  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
2649  _S_load(const bool* __mem)
2650  {
2651  using _I = __int_for_sizeof_t<_Tp>;
2652  if constexpr (sizeof(_Tp) == sizeof(bool))
2653  {
2654  const auto __bools
2655  = _CommonImpl::template _S_load<_I, _S_size<_Tp>>(__mem);
2656  // bool is {0, 1}, everything else is UB
2657  return __bools > 0;
2658  }
2659  else
2660  return __generate_vector<_I, _S_size<_Tp>>([&](auto __i) constexpr {
2661  return __mem[__i] ? ~_I() : _I();
2662  });
2663  }
2664 
2665  // }}}
2666  // _S_convert {{{
2667  template <typename _Tp, size_t _Np, bool _Sanitized>
2668  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
2669  _S_convert(_BitMask<_Np, _Sanitized> __x)
2670  {
2671  if constexpr (__is_builtin_bitmask_abi<_Abi>())
2672  return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(__x._M_to_bits());
2673  else
2674  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
2675  _S_size<_Tp>>(
2676  __x._M_sanitized());
2677  }
2678 
2679  template <typename _Tp, size_t _Np>
2680  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
2681  _S_convert(_SimdWrapper<bool, _Np> __x)
2682  {
2683  if constexpr (__is_builtin_bitmask_abi<_Abi>())
2684  return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(__x._M_data);
2685  else
2686  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
2687  _S_size<_Tp>>(
2688  _BitMask<_Np>(__x._M_data)._M_sanitized());
2689  }
2690 
2691  template <typename _Tp, typename _Up, size_t _Np>
2692  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
2693  _S_convert(_SimdWrapper<_Up, _Np> __x)
2694  {
2695  if constexpr (__is_builtin_bitmask_abi<_Abi>())
2696  return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(
2697  _SuperImpl::_S_to_bits(__x));
2698  else
2699  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
2700  _S_size<_Tp>>(__x);
2701  }
2702 
2703  template <typename _Tp, typename _Up, typename _UAbi>
2704  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
2705  _S_convert(simd_mask<_Up, _UAbi> __x)
2706  {
2707  if constexpr (__is_builtin_bitmask_abi<_Abi>())
2708  {
2709  using _R = _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>;
2710  if constexpr (__is_builtin_bitmask_abi<_UAbi>()) // bits -> bits
2711  return _R(__data(__x));
2712  else if constexpr (__is_scalar_abi<_UAbi>()) // bool -> bits
2713  return _R(__data(__x));
2714  else if constexpr (__is_fixed_size_abi_v<_UAbi>) // bitset -> bits
2715  return _R(__data(__x)._M_to_bits());
2716  else // vector -> bits
2717  return _R(_UAbi::_MaskImpl::_S_to_bits(__data(__x))._M_to_bits());
2718  }
2719  else
2720  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
2721  _S_size<_Tp>>(
2722  __data(__x));
2723  }
2724 
2725  // }}}
2726  // _S_masked_load {{{2
2727  template <typename _Tp, size_t _Np>
2728  static inline _SimdWrapper<_Tp, _Np>
2729  _S_masked_load(_SimdWrapper<_Tp, _Np> __merge,
2730  _SimdWrapper<_Tp, _Np> __mask, const bool* __mem) noexcept
2731  {
2732  // AVX(2) has 32/64 bit maskload, but nothing at 8 bit granularity
2733  auto __tmp = __wrapper_bitcast<__int_for_sizeof_t<_Tp>>(__merge);
2734  _BitOps::_S_bit_iteration(_SuperImpl::_S_to_bits(__mask),
2735  [&](auto __i) {
2736  __tmp._M_set(__i, -__mem[__i]);
2737  });
2738  __merge = __wrapper_bitcast<_Tp>(__tmp);
2739  return __merge;
2740  }
2741 
2742  // _S_store {{{2
2743  template <typename _Tp, size_t _Np>
2744  _GLIBCXX_SIMD_INTRINSIC static void _S_store(_SimdWrapper<_Tp, _Np> __v,
2745  bool* __mem) noexcept
2746  {
2747  __execute_n_times<_Np>([&](auto __i) constexpr {
2748  __mem[__i] = __v[__i];
2749  });
2750  }
2751 
2752  // _S_masked_store {{{2
2753  template <typename _Tp, size_t _Np>
2754  static inline void
2755  _S_masked_store(const _SimdWrapper<_Tp, _Np> __v, bool* __mem,
2756  const _SimdWrapper<_Tp, _Np> __k) noexcept
2757  {
2758  _BitOps::_S_bit_iteration(
2759  _SuperImpl::_S_to_bits(__k), [&](auto __i) constexpr {
2760  __mem[__i] = __v[__i];
2761  });
2762  }
2763 
2764  // _S_from_bitmask{{{2
2765  template <size_t _Np, typename _Tp>
2766  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2767  _S_from_bitmask(_SanitizedBitMask<_Np> __bits, _TypeTag<_Tp>)
2768  {
2769  return _SuperImpl::template _S_to_maskvector<_Tp, _S_size<_Tp>>(__bits);
2770  }
2771 
2772  // logical and bitwise operators {{{2
2773  template <typename _Tp, size_t _Np>
2774  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2775  _S_logical_and(const _SimdWrapper<_Tp, _Np>& __x,
2776  const _SimdWrapper<_Tp, _Np>& __y)
2777  { return __and(__x._M_data, __y._M_data); }
2778 
2779  template <typename _Tp, size_t _Np>
2780  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2781  _S_logical_or(const _SimdWrapper<_Tp, _Np>& __x,
2782  const _SimdWrapper<_Tp, _Np>& __y)
2783  { return __or(__x._M_data, __y._M_data); }
2784 
2785  template <typename _Tp, size_t _Np>
2786  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2787  _S_bit_not(const _SimdWrapper<_Tp, _Np>& __x)
2788  {
2789  if constexpr (_Abi::template _S_is_partial<_Tp>)
2790  return __andnot(__x, __wrapper_bitcast<_Tp>(
2791  _Abi::template _S_implicit_mask<_Tp>()));
2792  else
2793  return __not(__x._M_data);
2794  }
2795 
2796  template <typename _Tp, size_t _Np>
2797  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2798  _S_bit_and(const _SimdWrapper<_Tp, _Np>& __x,
2799  const _SimdWrapper<_Tp, _Np>& __y)
2800  { return __and(__x._M_data, __y._M_data); }
2801 
2802  template <typename _Tp, size_t _Np>
2803  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2804  _S_bit_or(const _SimdWrapper<_Tp, _Np>& __x,
2805  const _SimdWrapper<_Tp, _Np>& __y)
2806  { return __or(__x._M_data, __y._M_data); }
2807 
2808  template <typename _Tp, size_t _Np>
2809  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2810  _S_bit_xor(const _SimdWrapper<_Tp, _Np>& __x,
2811  const _SimdWrapper<_Tp, _Np>& __y)
2812  { return __xor(__x._M_data, __y._M_data); }
2813 
2814  // smart_reference access {{{2
2815  template <typename _Tp, size_t _Np>
2816  static constexpr void _S_set(_SimdWrapper<_Tp, _Np>& __k, int __i,
2817  bool __x) noexcept
2818  {
2819  if constexpr (is_same_v<_Tp, bool>)
2820  __k._M_set(__i, __x);
2821  else
2822  {
2823  static_assert(is_same_v<_Tp, __int_for_sizeof_t<_Tp>>);
2824  if (__builtin_is_constant_evaluated())
2825  {
2826  __k = __generate_from_n_evaluations<_Np,
2827  __vector_type_t<_Tp, _Np>>(
2828  [&](auto __j) {
2829  if (__i == __j)
2830  return _Tp(-__x);
2831  else
2832  return __k[+__j];
2833  });
2834  }
2835  else
2836  __k._M_data[__i] = -__x;
2837  }
2838  }
2839 
2840  // _S_masked_assign{{{2
2841  template <typename _Tp, size_t _Np>
2842  _GLIBCXX_SIMD_INTRINSIC static void
2843  _S_masked_assign(_SimdWrapper<_Tp, _Np> __k,
2844  _SimdWrapper<_Tp, _Np>& __lhs,
2845  __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs)
2846  { __lhs = _CommonImpl::_S_blend(__k, __lhs, __rhs); }
2847 
2848  template <typename _Tp, size_t _Np>
2849  _GLIBCXX_SIMD_INTRINSIC static void
2850  _S_masked_assign(_SimdWrapper<_Tp, _Np> __k,
2851  _SimdWrapper<_Tp, _Np>& __lhs, bool __rhs)
2852  {
2853  if (__builtin_constant_p(__rhs))
2854  {
2855  if (__rhs == false)
2856  __lhs = __andnot(__k, __lhs);
2857  else
2858  __lhs = __or(__k, __lhs);
2859  return;
2860  }
2861  __lhs = _CommonImpl::_S_blend(__k, __lhs,
2862  __data(simd_mask<_Tp, _Abi>(__rhs)));
2863  }
2864 
2865  //}}}2
2866  // _S_all_of {{{
2867  template <typename _Tp>
2868  _GLIBCXX_SIMD_INTRINSIC static bool
2869  _S_all_of(simd_mask<_Tp, _Abi> __k)
2870  {
2871  return __call_with_subscripts(
2872  __data(__k), make_index_sequence<_S_size<_Tp>>(),
2873  [](const auto... __ent) constexpr { return (... && !(__ent == 0)); });
2874  }
2875 
2876  // }}}
2877  // _S_any_of {{{
2878  template <typename _Tp>
2879  _GLIBCXX_SIMD_INTRINSIC static bool
2880  _S_any_of(simd_mask<_Tp, _Abi> __k)
2881  {
2882  return __call_with_subscripts(
2883  __data(__k), make_index_sequence<_S_size<_Tp>>(),
2884  [](const auto... __ent) constexpr { return (... || !(__ent == 0)); });
2885  }
2886 
2887  // }}}
2888  // _S_none_of {{{
2889  template <typename _Tp>
2890  _GLIBCXX_SIMD_INTRINSIC static bool
2891  _S_none_of(simd_mask<_Tp, _Abi> __k)
2892  {
2893  return __call_with_subscripts(
2894  __data(__k), make_index_sequence<_S_size<_Tp>>(),
2895  [](const auto... __ent) constexpr { return (... && (__ent == 0)); });
2896  }
2897 
2898  // }}}
2899  // _S_some_of {{{
2900  template <typename _Tp>
2901  _GLIBCXX_SIMD_INTRINSIC static bool
2902  _S_some_of(simd_mask<_Tp, _Abi> __k)
2903  {
2904  const int __n_true = _SuperImpl::_S_popcount(__k);
2905  return __n_true > 0 && __n_true < int(_S_size<_Tp>);
2906  }
2907 
2908  // }}}
2909  // _S_popcount {{{
2910  template <typename _Tp>
2911  _GLIBCXX_SIMD_INTRINSIC static int
2912  _S_popcount(simd_mask<_Tp, _Abi> __k)
2913  {
2914  using _I = __int_for_sizeof_t<_Tp>;
2915  if constexpr (is_default_constructible_v<simd<_I, _Abi>>)
2916  return -reduce(
2917  simd<_I, _Abi>(__private_init, __wrapper_bitcast<_I>(__data(__k))));
2918  else
2919  return -reduce(__bit_cast<rebind_simd_t<_I, simd<_Tp, _Abi>>>(
2920  simd<_Tp, _Abi>(__private_init, __data(__k))));
2921  }
2922 
2923  // }}}
2924  // _S_find_first_set {{{
2925  template <typename _Tp>
2926  _GLIBCXX_SIMD_INTRINSIC static int
2927  _S_find_first_set(simd_mask<_Tp, _Abi> __k)
2928  {
2929  return std::__countr_zero(
2930  _SuperImpl::_S_to_bits(__data(__k))._M_to_bits());
2931  }
2932 
2933  // }}}
2934  // _S_find_last_set {{{
2935  template <typename _Tp>
2936  _GLIBCXX_SIMD_INTRINSIC static int
2937  _S_find_last_set(simd_mask<_Tp, _Abi> __k)
2938  {
2939  return std::__bit_width(
2940  _SuperImpl::_S_to_bits(__data(__k))._M_to_bits()) - 1;
2941  }
2942 
2943  // }}}
2944  };
2945 
2946 //}}}1
2947 _GLIBCXX_SIMD_END_NAMESPACE
2948 #endif // __cplusplus >= 201703L
2949 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_
2950 
2951 // vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=80
complex< _Tp > log10(const complex< _Tp > &)
Return complex base 10 logarithm of z.
Definition: complex:829
complex< _Tp > sin(const complex< _Tp > &)
Return complex sine of z.
Definition: complex:859
complex< _Tp > log(const complex< _Tp > &)
Return complex natural logarithm of z.
Definition: complex:824
complex< _Tp > tan(const complex< _Tp > &)
Return complex tangent of z.
Definition: complex:960
complex< _Tp > exp(const complex< _Tp > &)
Return complex base e exponential of z.
Definition: complex:797
complex< _Tp > cosh(const complex< _Tp > &)
Return complex hyperbolic cosine of z.
Definition: complex:771
complex< _Tp > tanh(const complex< _Tp > &)
Return complex hyperbolic tangent of z.
Definition: complex:988
complex< _Tp > pow(const complex< _Tp > &, int)
Return x to the y'th power.
Definition: complex:1019
complex< _Tp > sinh(const complex< _Tp > &)
Return complex hyperbolic sine of z.
Definition: complex:889
complex< _Tp > cos(const complex< _Tp > &)
Return complex cosine of z.
Definition: complex:741
complex< _Tp > sqrt(const complex< _Tp > &)
Return complex square root of z.
Definition: complex:933
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition: type_traits:83
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition: type_traits:2589
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:230
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition: numeric:278
_Tp fabs(const std::complex< _Tp > &)
fabs(__z) [8.1.8].
Definition: complex:1846
std::complex< _Tp > asinh(const std::complex< _Tp > &)
asinh(__z) [8.1.6].
Definition: complex:1793
std::complex< _Tp > atan(const std::complex< _Tp > &)
atan(__z) [8.1.4].
Definition: complex:1718
make_integer_sequence< size_t, _Num > make_index_sequence
Alias template make_index_sequence.
Definition: utility:343
std::complex< _Tp > atanh(const std::complex< _Tp > &)
atanh(__z) [8.1.7].
Definition: complex:1837
std::complex< _Tp > acosh(const std::complex< _Tp > &)
acosh(__z) [8.1.5].
Definition: complex:1754
std::complex< _Tp > acos(const std::complex< _Tp > &)
acos(__z) [8.1.2].
Definition: complex:1638
std::complex< _Tp > asin(const std::complex< _Tp > &)
asin(__z) [8.1.3].
Definition: complex:1674