1 #ifndef INCLUDED_volk_32f_s32f_convert_8i_u_H
2 #define INCLUDED_volk_32f_s32f_convert_8i_u_H
17 static inline void volk_32f_s32f_convert_8i_u_sse2(
int8_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
18 unsigned int number = 0;
20 const unsigned int sixteenthPoints = num_points / 16;
22 const float* inputVectorPtr = (
const float*)inputVector;
23 int8_t* outputVectorPtr = outputVector;
24 __m128 vScalar = _mm_set_ps1(scalar);
25 __m128 inputVal1, inputVal2, inputVal3, inputVal4;
26 __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
28 for(;number < sixteenthPoints; number++){
29 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
30 inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
31 inputVal3 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
32 inputVal4 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
34 intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
35 intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
36 intInputVal3 = _mm_cvtps_epi32(_mm_mul_ps(inputVal3, vScalar));
37 intInputVal4 = _mm_cvtps_epi32(_mm_mul_ps(inputVal4, vScalar));
39 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
40 intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
42 intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
44 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
45 outputVectorPtr += 16;
48 number = sixteenthPoints * 16;
49 for(; number < num_points; number++){
50 outputVector[number] = (
int8_t)(inputVector[number] * scalar);
56 #include <xmmintrin.h>
65 static inline void volk_32f_s32f_convert_8i_u_sse(
int8_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
66 unsigned int number = 0;
68 const unsigned int quarterPoints = num_points / 4;
70 const float* inputVectorPtr = (
const float*)inputVector;
71 int8_t* outputVectorPtr = outputVector;
72 __m128 vScalar = _mm_set_ps1(scalar);
77 for(;number < quarterPoints; number++){
78 ret = _mm_loadu_ps(inputVectorPtr);
81 ret = _mm_mul_ps(ret, vScalar);
83 _mm_store_ps(outputFloatBuffer, ret);
84 *outputVectorPtr++ = (
int8_t)(outputFloatBuffer[0]);
85 *outputVectorPtr++ = (
int8_t)(outputFloatBuffer[1]);
86 *outputVectorPtr++ = (
int8_t)(outputFloatBuffer[2]);
87 *outputVectorPtr++ = (
int8_t)(outputFloatBuffer[3]);
90 number = quarterPoints * 4;
91 for(; number < num_points; number++){
92 outputVector[number] = (
int8_t)(inputVector[number] * scalar);
97 #ifdef LV_HAVE_GENERIC
106 static inline void volk_32f_s32f_convert_8i_u_generic(
int8_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
107 int8_t* outputVectorPtr = outputVector;
108 const float* inputVectorPtr = inputVector;
109 unsigned int number = 0;
111 for(number = 0; number < num_points; number++){
112 *outputVectorPtr++ = ((
int8_t)(*inputVectorPtr++ * scalar));