GNU Radio 3.4.0 C++ API
|
00001 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H 00002 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #if LV_HAVE_SSE2 00008 #include <emmintrin.h> 00009 /*! 00010 \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. 00011 \param iBuffer The I buffer data to be interleaved 00012 \param qBuffer The Q buffer data to be interleaved 00013 \param complexVector The complex output vector 00014 \param scalar The scaling value being multiplied against each data point 00015 \param num_points The number of complex data values to be interleaved 00016 */ 00017 static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ 00018 unsigned int number = 0; 00019 const float* iBufferPtr = iBuffer; 00020 const float* qBufferPtr = qBuffer; 00021 00022 __m128 vScalar = _mm_set_ps1(scalar); 00023 00024 const unsigned int quarterPoints = num_points / 4; 00025 00026 __m128 iValue, qValue, cplxValue1, cplxValue2; 00027 __m128i intValue1, intValue2; 00028 00029 int16_t* complexVectorPtr = (int16_t*)complexVector; 00030 00031 for(;number < quarterPoints; number++){ 00032 iValue = _mm_load_ps(iBufferPtr); 00033 qValue = _mm_load_ps(qBufferPtr); 00034 00035 // Interleaves the lower two values in the i and q variables into one buffer 00036 cplxValue1 = _mm_unpacklo_ps(iValue, qValue); 00037 cplxValue1 = _mm_mul_ps(cplxValue1, vScalar); 00038 00039 // Interleaves the upper two values in the i and q variables into one buffer 00040 cplxValue2 = _mm_unpackhi_ps(iValue, qValue); 00041 cplxValue2 = _mm_mul_ps(cplxValue2, vScalar); 00042 00043 intValue1 = _mm_cvtps_epi32(cplxValue1); 00044 intValue2 = _mm_cvtps_epi32(cplxValue2); 00045 00046 intValue1 = _mm_packs_epi32(intValue1, intValue2); 00047 00048 _mm_store_si128((__m128i*)complexVectorPtr, intValue1); 00049 complexVectorPtr += 8; 00050 00051 iBufferPtr += 4; 00052 qBufferPtr += 4; 00053 } 00054 00055 number = quarterPoints * 4; 00056 complexVectorPtr = (int16_t*)(&complexVector[number]); 00057 for(; number < num_points; number++){ 00058 *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar); 00059 *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar); 00060 } 00061 00062 } 00063 #endif /* LV_HAVE_SSE2 */ 00064 00065 #if LV_HAVE_SSE 00066 #include <xmmintrin.h> 00067 /*! 00068 \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. 00069 \param iBuffer The I buffer data to be interleaved 00070 \param qBuffer The Q buffer data to be interleaved 00071 \param complexVector The complex output vector 00072 \param scalar The scaling value being multiplied against each data point 00073 \param num_points The number of complex data values to be interleaved 00074 */ 00075 static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ 00076 unsigned int number = 0; 00077 const float* iBufferPtr = iBuffer; 00078 const float* qBufferPtr = qBuffer; 00079 00080 __m128 vScalar = _mm_set_ps1(scalar); 00081 00082 const unsigned int quarterPoints = num_points / 4; 00083 00084 __m128 iValue, qValue, cplxValue; 00085 00086 int16_t* complexVectorPtr = (int16_t*)complexVector; 00087 00088 float floatBuffer[4] __attribute__((aligned(128))); 00089 00090 for(;number < quarterPoints; number++){ 00091 iValue = _mm_load_ps(iBufferPtr); 00092 qValue = _mm_load_ps(qBufferPtr); 00093 00094 // Interleaves the lower two values in the i and q variables into one buffer 00095 cplxValue = _mm_unpacklo_ps(iValue, qValue); 00096 cplxValue = _mm_mul_ps(cplxValue, vScalar); 00097 00098 _mm_store_ps(floatBuffer, cplxValue); 00099 00100 *complexVectorPtr++ = (int16_t)(floatBuffer[0]); 00101 *complexVectorPtr++ = (int16_t)(floatBuffer[1]); 00102 *complexVectorPtr++ = (int16_t)(floatBuffer[2]); 00103 *complexVectorPtr++ = (int16_t)(floatBuffer[3]); 00104 00105 // Interleaves the upper two values in the i and q variables into one buffer 00106 cplxValue = _mm_unpackhi_ps(iValue, qValue); 00107 cplxValue = _mm_mul_ps(cplxValue, vScalar); 00108 00109 _mm_store_ps(floatBuffer, cplxValue); 00110 00111 *complexVectorPtr++ = (int16_t)(floatBuffer[0]); 00112 *complexVectorPtr++ = (int16_t)(floatBuffer[1]); 00113 *complexVectorPtr++ = (int16_t)(floatBuffer[2]); 00114 *complexVectorPtr++ = (int16_t)(floatBuffer[3]); 00115 00116 iBufferPtr += 4; 00117 qBufferPtr += 4; 00118 } 00119 00120 number = quarterPoints * 4; 00121 complexVectorPtr = (int16_t*)(&complexVector[number]); 00122 for(; number < num_points; number++){ 00123 *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar); 00124 *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar); 00125 } 00126 00127 } 00128 #endif /* LV_HAVE_SSE */ 00129 00130 #if LV_HAVE_GENERIC 00131 /*! 00132 \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. 00133 \param iBuffer The I buffer data to be interleaved 00134 \param qBuffer The Q buffer data to be interleaved 00135 \param complexVector The complex output vector 00136 \param scalar The scaling value being multiplied against each data point 00137 \param num_points The number of complex data values to be interleaved 00138 */ 00139 static inline void volk_32f_x2_s32f_interleave_16ic_a16_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ 00140 int16_t* complexVectorPtr = (int16_t*)complexVector; 00141 const float* iBufferPtr = iBuffer; 00142 const float* qBufferPtr = qBuffer; 00143 unsigned int number = 0; 00144 00145 for(number = 0; number < num_points; number++){ 00146 *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar); 00147 *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar); 00148 } 00149 } 00150 #endif /* LV_HAVE_GENERIC */ 00151 00152 00153 00154 00155 #endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H */