1 #ifndef INCLUDED_volk_32f_s32f_convert_16i_a_H
2 #define INCLUDED_volk_32f_s32f_convert_16i_a_H
10 #include <emmintrin.h>
18 static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
19 unsigned int number = 0;
21 const unsigned int eighthPoints = num_points / 8;
23 const float* inputVectorPtr = (
const float*)inputVector;
24 int16_t* outputVectorPtr = outputVector;
26 float min_val = -32768;
27 float max_val = 32767;
30 __m128 vScalar = _mm_set_ps1(scalar);
31 __m128 inputVal1, inputVal2;
32 __m128i intInputVal1, intInputVal2;
34 __m128 vmin_val = _mm_set_ps1(min_val);
35 __m128 vmax_val = _mm_set_ps1(max_val);
37 for(;number < eighthPoints; number++){
38 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
39 inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
42 ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
43 ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
45 intInputVal1 = _mm_cvtps_epi32(ret1);
46 intInputVal2 = _mm_cvtps_epi32(ret2);
48 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
50 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
54 number = eighthPoints * 8;
55 for(; number < num_points; number++){
56 r = inputVector[number] * scalar;
61 outputVector[number] = (int16_t)rintf(r);
67 #include <xmmintrin.h>
75 static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
76 unsigned int number = 0;
78 const unsigned int quarterPoints = num_points / 4;
80 const float* inputVectorPtr = (
const float*)inputVector;
81 int16_t* outputVectorPtr = outputVector;
83 float min_val = -32768;
84 float max_val = 32767;
87 __m128 vScalar = _mm_set_ps1(scalar);
89 __m128 vmin_val = _mm_set_ps1(min_val);
90 __m128 vmax_val = _mm_set_ps1(max_val);
94 for(;number < quarterPoints; number++){
95 ret = _mm_load_ps(inputVectorPtr);
99 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
101 _mm_store_ps(outputFloatBuffer, ret);
102 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
103 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
104 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
105 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
108 number = quarterPoints * 4;
109 for(; number < num_points; number++){
110 r = inputVector[number] * scalar;
115 outputVector[number] = (int16_t)rintf(r);
120 #ifdef LV_HAVE_GENERIC
128 static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
129 int16_t* outputVectorPtr = outputVector;
130 const float* inputVectorPtr = inputVector;
131 unsigned int number = 0;
132 float min_val = -32768;
133 float max_val = 32767;
136 for(number = 0; number < num_points; number++){
137 r = *inputVectorPtr++ * scalar;
142 *outputVectorPtr++ = (int16_t)rintf(r);