GNU Radio 3.5.3.1 C++ API
volk_32f_x2_multiply_32f_u.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H
2 #define INCLUDED_volk_32f_x2_multiply_32f_u_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 
7 #ifdef LV_HAVE_SSE
8 #include <xmmintrin.h>
9 /*!
10  \brief Multiplys the two input vectors and store their results in the third vector
11  \param cVector The vector where the results will be stored
12  \param aVector One of the vectors to be multiplied
13  \param bVector One of the vectors to be multiplied
14  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
15 */
16 static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
17  unsigned int number = 0;
18  const unsigned int quarterPoints = num_points / 4;
19 
20  float* cPtr = cVector;
21  const float* aPtr = aVector;
22  const float* bPtr= bVector;
23 
24  __m128 aVal, bVal, cVal;
25  for(;number < quarterPoints; number++){
26 
27  aVal = _mm_loadu_ps(aPtr);
28  bVal = _mm_loadu_ps(bPtr);
29 
30  cVal = _mm_mul_ps(aVal, bVal);
31 
32  _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
33 
34  aPtr += 4;
35  bPtr += 4;
36  cPtr += 4;
37  }
38 
39  number = quarterPoints * 4;
40  for(;number < num_points; number++){
41  *cPtr++ = (*aPtr++) * (*bPtr++);
42  }
43 }
44 #endif /* LV_HAVE_SSE */
45 
46 #ifdef LV_HAVE_AVX
47 #include <immintrin.h>
48 /*!
49  \brief Multiplies the two input vectors and store their results in the third vector
50  \param cVector The vector where the results will be stored
51  \param aVector One of the vectors to be multiplied
52  \param bVector One of the vectors to be multiplied
53  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
54 */
55 static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
56  unsigned int number = 0;
57  const unsigned int eighthPoints = num_points / 8;
58 
59  float* cPtr = cVector;
60  const float* aPtr = aVector;
61  const float* bPtr= bVector;
62 
63  __m256 aVal, bVal, cVal;
64  for(;number < eighthPoints; number++){
65 
66  aVal = _mm256_loadu_ps(aPtr);
67  bVal = _mm256_loadu_ps(bPtr);
68 
69  cVal = _mm256_mul_ps(aVal, bVal);
70 
71  _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
72 
73  aPtr += 8;
74  bPtr += 8;
75  cPtr += 8;
76  }
77 
78  number = eighthPoints * 8;
79  for(;number < num_points; number++){
80  *cPtr++ = (*aPtr++) * (*bPtr++);
81  }
82 }
83 #endif /* LV_HAVE_AVX */
84 
85 #ifdef LV_HAVE_GENERIC
86 /*!
87  \brief Multiplys the two input vectors and store their results in the third vector
88  \param cVector The vector where the results will be stored
89  \param aVector One of the vectors to be multiplied
90  \param bVector One of the vectors to be multiplied
91  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
92 */
93 static inline void volk_32f_x2_multiply_32f_u_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
94  float* cPtr = cVector;
95  const float* aPtr = aVector;
96  const float* bPtr= bVector;
97  unsigned int number = 0;
98 
99  for(number = 0; number < num_points; number++){
100  *cPtr++ = (*aPtr++) * (*bPtr++);
101  }
102 }
103 #endif /* LV_HAVE_GENERIC */
104 
105 
106 #endif /* INCLUDED_volk_32f_x2_multiply_32f_u_H */