1 #ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
2 #define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
10 #include <pmmintrin.h>
18 static inline void volk_32fc_x2_multiply_conjugate_32fc_a_sse3(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
19 unsigned int number = 0;
20 const unsigned int halfPoints = num_points / 2;
22 __m128 x, y, yl, yh, z, tmp1, tmp2;
27 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
29 for(;number < halfPoints; number++){
31 x = _mm_load_ps((
float*)a);
32 y = _mm_load_ps((
float*)b);
34 y = _mm_xor_ps(y, conjugator);
36 yl = _mm_moveldup_ps(y);
37 yh = _mm_movehdup_ps(y);
39 tmp1 = _mm_mul_ps(x,yl);
41 x = _mm_shuffle_ps(x,x,0xB1);
43 tmp2 = _mm_mul_ps(x,yh);
45 z = _mm_addsub_ps(tmp1,tmp2);
47 _mm_store_ps((
float*)c,z);
54 if((num_points % 2) != 0) {
60 #ifdef LV_HAVE_GENERIC
68 static inline void volk_32fc_x2_multiply_conjugate_32fc_a_generic(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
72 unsigned int number = 0;
74 for(number = 0; number < num_points; number++){
75 *cPtr++ = (*aPtr++) *
lv_conj(*bPtr++);