From: Johannes Demel Date: Tue, 17 Mar 2020 21:07:07 +0000 (+0100) Subject: [PATCH 5/7] clang-format: Rebase onto current master X-Git-Tag: archive/raspbian/2.2.1-2+rpi1^2~7 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=cd501c84e3c313f245f24716d0b52d444ac0f96e;p=volk.git [PATCH 5/7] clang-format: Rebase onto current master This commit applies clang format to the latest master branch. Gbp-Pq: Name 0005-clang-format-Rebase-onto-current-master.patch --- diff --git a/.github/workflows/check-pr-formatting.yml b/.github/workflows/check-pr-formatting.yml index b1d2d83..9c7a286 100644 --- a/.github/workflows/check-pr-formatting.yml +++ b/.github/workflows/check-pr-formatting.yml @@ -2,6 +2,8 @@ name: Check PR Formatting on: push: + paths-ignore: + - 'tmpl/' pull_request: paths-ignore: - 'tmpl/' @@ -15,5 +17,5 @@ jobs: - uses: gnuradio/clang-format-lint-action@v0.5-4 with: source: '.' - exclude: './volk' + exclude: './tmpl' extensions: 'c,cc,cpp,cxx,h,hh' \ No newline at end of file diff --git a/include/volk/volk_common.h b/include/volk/volk_common.h index 7e78358..4e14982 100644 --- a/include/volk/volk_common.h +++ b/include/volk/volk_common.h @@ -5,15 +5,15 @@ // Cross-platform attribute macros //////////////////////////////////////////////////////////////////////// #if _MSC_VER -# define __VOLK_ATTR_ALIGNED(x) __declspec(align(x)) -# define __VOLK_ATTR_UNUSED -# define __VOLK_ATTR_INLINE __forceinline -# define __VOLK_ATTR_DEPRECATED __declspec(deprecated) -# define __VOLK_ATTR_EXPORT __declspec(dllexport) -# define __VOLK_ATTR_IMPORT __declspec(dllimport) -# define __VOLK_PREFETCH(addr) -# define __VOLK_ASM __asm -# define __VOLK_VOLATILE +#define __VOLK_ATTR_ALIGNED(x) __declspec(align(x)) +#define __VOLK_ATTR_UNUSED +#define __VOLK_ATTR_INLINE __forceinline +#define __VOLK_ATTR_DEPRECATED __declspec(deprecated) +#define __VOLK_ATTR_EXPORT __declspec(dllexport) +#define __VOLK_ATTR_IMPORT __declspec(dllimport) +#define __VOLK_PREFETCH(addr) +#define __VOLK_ASM __asm +#define __VOLK_VOLATILE #elif defined(__clang__) // AppleClang also defines __GNUC__, so do this check first. These // will probably be the same as for __GNUC__, but let's keep them diff --git a/kernels/volk/volk_32f_exp_32f.h b/kernels/volk/volk_32f_exp_32f.h index 26fdf02..da4ada7 100644 --- a/kernels/volk/volk_32f_exp_32f.h +++ b/kernels/volk/volk_32f_exp_32f.h @@ -92,9 +92,9 @@ * \endcode */ -#include -#include #include +#include +#include #ifndef INCLUDED_volk_32f_exp_32f_a_H #define INCLUDED_volk_32f_exp_32f_a_H @@ -105,74 +105,74 @@ static inline void volk_32f_exp_32f_a_sse2(float* bVector, const float* aVector, unsigned int num_points) { - float* bPtr = bVector; - const float* aPtr = aVector; - - unsigned int number = 0; - unsigned int quarterPoints = num_points / 4; - - // Declare variables and constants - __m128 aVal, bVal, tmp, fx, mask, pow2n, z, y; - __m128 one, exp_hi, exp_lo, log2EF, half, exp_C1, exp_C2; - __m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5; - __m128i emm0, pi32_0x7f; - - one = _mm_set1_ps(1.0); - exp_hi = _mm_set1_ps(88.3762626647949); - exp_lo = _mm_set1_ps(-88.3762626647949); - log2EF = _mm_set1_ps(1.44269504088896341); - half = _mm_set1_ps(0.5); - exp_C1 = _mm_set1_ps(0.693359375); - exp_C2 = _mm_set1_ps(-2.12194440e-4); - pi32_0x7f = _mm_set1_epi32(0x7f); - - exp_p0 = _mm_set1_ps(1.9875691500e-4); - exp_p1 = _mm_set1_ps(1.3981999507e-3); - exp_p2 = _mm_set1_ps(8.3334519073e-3); - exp_p3 = _mm_set1_ps(4.1665795894e-2); - exp_p4 = _mm_set1_ps(1.6666665459e-1); - exp_p5 = _mm_set1_ps(5.0000001201e-1); - - for(;number < quarterPoints; number++) { - aVal = _mm_load_ps(aPtr); - tmp = _mm_setzero_ps(); - - aVal = _mm_max_ps(_mm_min_ps(aVal, exp_hi), exp_lo); - - /* express exp(x) as exp(g + n*log(2)) */ - fx = _mm_add_ps(_mm_mul_ps(aVal, log2EF), half); - - emm0 = _mm_cvttps_epi32(fx); - tmp = _mm_cvtepi32_ps(emm0); - - mask = _mm_and_ps(_mm_cmpgt_ps(tmp, fx), one); - fx = _mm_sub_ps(tmp, mask); - - tmp = _mm_mul_ps(fx, exp_C1); - z = _mm_mul_ps(fx, exp_C2); - aVal = _mm_sub_ps(_mm_sub_ps(aVal, tmp), z); - z = _mm_mul_ps(aVal, aVal); - - y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(exp_p0, aVal), exp_p1), aVal); - y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p2), aVal), exp_p3); - y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, aVal), exp_p4), aVal); - y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p5), z), aVal); - y = _mm_add_ps(y, one); - - emm0 = _mm_slli_epi32(_mm_add_epi32(_mm_cvttps_epi32(fx), pi32_0x7f), 23); - - pow2n = _mm_castsi128_ps(emm0); - bVal = _mm_mul_ps(y, pow2n); - - _mm_store_ps(bPtr, bVal); - aPtr += 4; - bPtr += 4; - } - - number = quarterPoints * 4; - for(;number < num_points; number++) { - *bPtr++ = expf(*aPtr++); - } + float* bPtr = bVector; + const float* aPtr = aVector; + + unsigned int number = 0; + unsigned int quarterPoints = num_points / 4; + + // Declare variables and constants + __m128 aVal, bVal, tmp, fx, mask, pow2n, z, y; + __m128 one, exp_hi, exp_lo, log2EF, half, exp_C1, exp_C2; + __m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5; + __m128i emm0, pi32_0x7f; + + one = _mm_set1_ps(1.0); + exp_hi = _mm_set1_ps(88.3762626647949); + exp_lo = _mm_set1_ps(-88.3762626647949); + log2EF = _mm_set1_ps(1.44269504088896341); + half = _mm_set1_ps(0.5); + exp_C1 = _mm_set1_ps(0.693359375); + exp_C2 = _mm_set1_ps(-2.12194440e-4); + pi32_0x7f = _mm_set1_epi32(0x7f); + + exp_p0 = _mm_set1_ps(1.9875691500e-4); + exp_p1 = _mm_set1_ps(1.3981999507e-3); + exp_p2 = _mm_set1_ps(8.3334519073e-3); + exp_p3 = _mm_set1_ps(4.1665795894e-2); + exp_p4 = _mm_set1_ps(1.6666665459e-1); + exp_p5 = _mm_set1_ps(5.0000001201e-1); + + for (; number < quarterPoints; number++) { + aVal = _mm_load_ps(aPtr); + tmp = _mm_setzero_ps(); + + aVal = _mm_max_ps(_mm_min_ps(aVal, exp_hi), exp_lo); + + /* express exp(x) as exp(g + n*log(2)) */ + fx = _mm_add_ps(_mm_mul_ps(aVal, log2EF), half); + + emm0 = _mm_cvttps_epi32(fx); + tmp = _mm_cvtepi32_ps(emm0); + + mask = _mm_and_ps(_mm_cmpgt_ps(tmp, fx), one); + fx = _mm_sub_ps(tmp, mask); + + tmp = _mm_mul_ps(fx, exp_C1); + z = _mm_mul_ps(fx, exp_C2); + aVal = _mm_sub_ps(_mm_sub_ps(aVal, tmp), z); + z = _mm_mul_ps(aVal, aVal); + + y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(exp_p0, aVal), exp_p1), aVal); + y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p2), aVal), exp_p3); + y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, aVal), exp_p4), aVal); + y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p5), z), aVal); + y = _mm_add_ps(y, one); + + emm0 = _mm_slli_epi32(_mm_add_epi32(_mm_cvttps_epi32(fx), pi32_0x7f), 23); + + pow2n = _mm_castsi128_ps(emm0); + bVal = _mm_mul_ps(y, pow2n); + + _mm_store_ps(bPtr, bVal); + aPtr += 4; + bPtr += 4; + } + + number = quarterPoints * 4; + for (; number < num_points; number++) { + *bPtr++ = expf(*aPtr++); + } } #endif /* LV_HAVE_SSE2 for aligned */ @@ -183,13 +183,13 @@ volk_32f_exp_32f_a_sse2(float* bVector, const float* aVector, unsigned int num_p static inline void volk_32f_exp_32f_a_generic(float* bVector, const float* aVector, unsigned int num_points) { - float* bPtr = bVector; - const float* aPtr = aVector; - unsigned int number = 0; + float* bPtr = bVector; + const float* aPtr = aVector; + unsigned int number = 0; - for(number = 0; number < num_points; number++) { - *bPtr++ = expf(*aPtr++); - } + for (number = 0; number < num_points; number++) { + *bPtr++ = expf(*aPtr++); + } } #endif /* LV_HAVE_GENERIC */ @@ -205,75 +205,75 @@ volk_32f_exp_32f_a_generic(float* bVector, const float* aVector, unsigned int nu static inline void volk_32f_exp_32f_u_sse2(float* bVector, const float* aVector, unsigned int num_points) { - float* bPtr = bVector; - const float* aPtr = aVector; - - unsigned int number = 0; - unsigned int quarterPoints = num_points / 4; - - // Declare variables and constants - __m128 aVal, bVal, tmp, fx, mask, pow2n, z, y; - __m128 one, exp_hi, exp_lo, log2EF, half, exp_C1, exp_C2; - __m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5; - __m128i emm0, pi32_0x7f; - - one = _mm_set1_ps(1.0); - exp_hi = _mm_set1_ps(88.3762626647949); - exp_lo = _mm_set1_ps(-88.3762626647949); - log2EF = _mm_set1_ps(1.44269504088896341); - half = _mm_set1_ps(0.5); - exp_C1 = _mm_set1_ps(0.693359375); - exp_C2 = _mm_set1_ps(-2.12194440e-4); - pi32_0x7f = _mm_set1_epi32(0x7f); - - exp_p0 = _mm_set1_ps(1.9875691500e-4); - exp_p1 = _mm_set1_ps(1.3981999507e-3); - exp_p2 = _mm_set1_ps(8.3334519073e-3); - exp_p3 = _mm_set1_ps(4.1665795894e-2); - exp_p4 = _mm_set1_ps(1.6666665459e-1); - exp_p5 = _mm_set1_ps(5.0000001201e-1); - - - for(;number < quarterPoints; number++) { - aVal = _mm_loadu_ps(aPtr); - tmp = _mm_setzero_ps(); - - aVal = _mm_max_ps(_mm_min_ps(aVal, exp_hi), exp_lo); - - /* express exp(x) as exp(g + n*log(2)) */ - fx = _mm_add_ps(_mm_mul_ps(aVal, log2EF), half); - - emm0 = _mm_cvttps_epi32(fx); - tmp = _mm_cvtepi32_ps(emm0); - - mask = _mm_and_ps(_mm_cmpgt_ps(tmp, fx), one); - fx = _mm_sub_ps(tmp, mask); - - tmp = _mm_mul_ps(fx, exp_C1); - z = _mm_mul_ps(fx, exp_C2); - aVal = _mm_sub_ps(_mm_sub_ps(aVal, tmp), z); - z = _mm_mul_ps(aVal, aVal); - - y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(exp_p0, aVal), exp_p1), aVal); - y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p2), aVal), exp_p3); - y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, aVal), exp_p4), aVal); - y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p5), z), aVal); - y = _mm_add_ps(y, one); - - emm0 = _mm_slli_epi32(_mm_add_epi32(_mm_cvttps_epi32(fx), pi32_0x7f), 23); - - pow2n = _mm_castsi128_ps(emm0); - bVal = _mm_mul_ps(y, pow2n); - - _mm_storeu_ps(bPtr, bVal); - aPtr += 4; - bPtr += 4; - } - - number = quarterPoints * 4; - for(;number < num_points; number++){ - *bPtr++ = expf(*aPtr++); - } + float* bPtr = bVector; + const float* aPtr = aVector; + + unsigned int number = 0; + unsigned int quarterPoints = num_points / 4; + + // Declare variables and constants + __m128 aVal, bVal, tmp, fx, mask, pow2n, z, y; + __m128 one, exp_hi, exp_lo, log2EF, half, exp_C1, exp_C2; + __m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5; + __m128i emm0, pi32_0x7f; + + one = _mm_set1_ps(1.0); + exp_hi = _mm_set1_ps(88.3762626647949); + exp_lo = _mm_set1_ps(-88.3762626647949); + log2EF = _mm_set1_ps(1.44269504088896341); + half = _mm_set1_ps(0.5); + exp_C1 = _mm_set1_ps(0.693359375); + exp_C2 = _mm_set1_ps(-2.12194440e-4); + pi32_0x7f = _mm_set1_epi32(0x7f); + + exp_p0 = _mm_set1_ps(1.9875691500e-4); + exp_p1 = _mm_set1_ps(1.3981999507e-3); + exp_p2 = _mm_set1_ps(8.3334519073e-3); + exp_p3 = _mm_set1_ps(4.1665795894e-2); + exp_p4 = _mm_set1_ps(1.6666665459e-1); + exp_p5 = _mm_set1_ps(5.0000001201e-1); + + + for (; number < quarterPoints; number++) { + aVal = _mm_loadu_ps(aPtr); + tmp = _mm_setzero_ps(); + + aVal = _mm_max_ps(_mm_min_ps(aVal, exp_hi), exp_lo); + + /* express exp(x) as exp(g + n*log(2)) */ + fx = _mm_add_ps(_mm_mul_ps(aVal, log2EF), half); + + emm0 = _mm_cvttps_epi32(fx); + tmp = _mm_cvtepi32_ps(emm0); + + mask = _mm_and_ps(_mm_cmpgt_ps(tmp, fx), one); + fx = _mm_sub_ps(tmp, mask); + + tmp = _mm_mul_ps(fx, exp_C1); + z = _mm_mul_ps(fx, exp_C2); + aVal = _mm_sub_ps(_mm_sub_ps(aVal, tmp), z); + z = _mm_mul_ps(aVal, aVal); + + y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(exp_p0, aVal), exp_p1), aVal); + y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p2), aVal), exp_p3); + y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, aVal), exp_p4), aVal); + y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p5), z), aVal); + y = _mm_add_ps(y, one); + + emm0 = _mm_slli_epi32(_mm_add_epi32(_mm_cvttps_epi32(fx), pi32_0x7f), 23); + + pow2n = _mm_castsi128_ps(emm0); + bVal = _mm_mul_ps(y, pow2n); + + _mm_storeu_ps(bPtr, bVal); + aPtr += 4; + bPtr += 4; + } + + number = quarterPoints * 4; + for (; number < num_points; number++) { + *bPtr++ = expf(*aPtr++); + } } #endif /* LV_HAVE_SSE2 for unaligned */ @@ -284,13 +284,13 @@ volk_32f_exp_32f_u_sse2(float* bVector, const float* aVector, unsigned int num_p static inline void volk_32f_exp_32f_u_generic(float* bVector, const float* aVector, unsigned int num_points) { - float* bPtr = bVector; - const float* aPtr = aVector; - unsigned int number = 0; + float* bPtr = bVector; + const float* aPtr = aVector; + unsigned int number = 0; - for(number = 0; number < num_points; number++){ - *bPtr++ = expf(*aPtr++); - } + for (number = 0; number < num_points; number++) { + *bPtr++ = expf(*aPtr++); + } } #endif /* LV_HAVE_GENERIC */