[PATCH 09/73] Code cleanup

author Zlika <zlika_ese@hotmail.com>

Mon, 5 Jul 2021 11:05:18 +0000 (13:05 +0200)

committer A. Maitland Bottoms <bottoms@debian.org>

Fri, 22 Oct 2021 03:30:05 +0000 (04:30 +0100)
author Zlika <zlika_ese@hotmail.com>
Mon, 5 Jul 2021 11:05:18 +0000 (13:05 +0200)
committer A. Maitland Bottoms <bottoms@debian.org>
Fri, 22 Oct 2021 03:30:05 +0000 (04:30 +0100)
diff --git a/kernels/volk/volk_32f_index_min_16u.h b/kernels/volk/volk_32f_index_min_16u.h

index 115835ed4609877bdf11743415cdeb6ab86b2916..00acd855ae4546ad24d0c9c3f2cb6971348b199d 100644 (file)
--- a/kernels/volk/volk_32f_index_min_16u.h
+++ b/kernels/volk/volk_32f_index_min_16u.h
@@ -2,14 +2,14 @@
  /*
   * Copyright 2021 Free Software Foundation, Inc.
   *
- * This file is part of GNU Radio
+ * This file is part of VOLK
   *
- * GNU Radio is free software; you can redistribute it and/or modify
+ * VOLK is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
   * the Free Software Foundation; either version 3, or (at your option)
   * any later version.
   *
- * GNU Radio is distributed in the hope that it will be useful,
+ * VOLK is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
diff --git a/kernels/volk/volk_32f_index_min_32u.h b/kernels/volk/volk_32f_index_min_32u.h

index a68ba9c6cae9fbbe74524debf3c1f37d403a724b..c71ee60835334b92cb8f3f916cd05473f1e5bee1 100644 (file)
--- a/kernels/volk/volk_32f_index_min_32u.h
+++ b/kernels/volk/volk_32f_index_min_32u.h
@@ -2,14 +2,14 @@
  /*
   * Copyright 2021 Free Software Foundation, Inc.
   *
- * This file is part of GNU Radio
+ * This file is part of VOLK
   *
- * GNU Radio is free software; you can redistribute it and/or modify
+ * VOLK is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
   * the Free Software Foundation; either version 3, or (at your option)
   * any later version.
   *
- * GNU Radio is distributed in the hope that it will be useful,
+ * VOLK is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
@@ -76,59 +76,57 @@ static inline void volk_32f_index_min_32u_a_sse4_1(uint32_t* target,
                                                     const float* source,
                                                     uint32_t num_points)
  {
-    if (num_points > 0) {
-        const uint32_t quarterPoints = num_points / 4;
+    const uint32_t quarterPoints = num_points / 4;
  
-        float* inputPtr = (float*)source;
+    float* inputPtr = (float*)source;
  
-        __m128 indexIncrementValues = _mm_set1_ps(4);
-        __m128 currentIndexes = _mm_set_ps(-1, -2, -3, -4);
+    __m128 indexIncrementValues = _mm_set1_ps(4);
+    __m128 currentIndexes = _mm_set_ps(-1, -2, -3, -4);
  
-        float min = source[0];
-        float index = 0;
-        __m128 minValues = _mm_set1_ps(min);
-        __m128 minValuesIndex = _mm_setzero_ps();
-        __m128 compareResults;
-        __m128 currentValues;
+    float min = source[0];
+    float index = 0;
+    __m128 minValues = _mm_set1_ps(min);
+    __m128 minValuesIndex = _mm_setzero_ps();
+    __m128 compareResults;
+    __m128 currentValues;
  
-        __VOLK_ATTR_ALIGNED(16) float minValuesBuffer[4];
-        __VOLK_ATTR_ALIGNED(16) float minIndexesBuffer[4];
+    __VOLK_ATTR_ALIGNED(16) float minValuesBuffer[4];
+    __VOLK_ATTR_ALIGNED(16) float minIndexesBuffer[4];
  
-        for (uint32_t number = 0; number < quarterPoints; number++) {
+    for (uint32_t number = 0; number < quarterPoints; number++) {
  
-            currentValues = _mm_load_ps(inputPtr);
-            inputPtr += 4;
-            currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
+        currentValues = _mm_load_ps(inputPtr);
+        inputPtr += 4;
+        currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
  
-            compareResults = _mm_cmplt_ps(currentValues, minValues);
+        compareResults = _mm_cmplt_ps(currentValues, minValues);
  
-            minValuesIndex =
-                _mm_blendv_ps(minValuesIndex, currentIndexes, compareResults);
-            minValues = _mm_blendv_ps(minValues, currentValues, compareResults);
-        }
+        minValuesIndex =
+            _mm_blendv_ps(minValuesIndex, currentIndexes, compareResults);
+        minValues = _mm_blendv_ps(minValues, currentValues, compareResults);
+    }
  
-        // Calculate the smallest value from the remaining 4 points
-        _mm_store_ps(minValuesBuffer, minValues);
-        _mm_store_ps(minIndexesBuffer, minValuesIndex);
+    // Calculate the smallest value from the remaining 4 points
+    _mm_store_ps(minValuesBuffer, minValues);
+    _mm_store_ps(minIndexesBuffer, minValuesIndex);
  
-        for (uint32_t number = 0; number < 4; number++) {
-            if (minValuesBuffer[number] < min) {
+    for (uint32_t number = 0; number < 4; number++) {
+        if (minValuesBuffer[number] < min) {
+            index = minIndexesBuffer[number];
+            min = minValuesBuffer[number];
+        } else if (minValuesBuffer[number] == min) {
+            if (index > minIndexesBuffer[number])
                  index = minIndexesBuffer[number];
-                min = minValuesBuffer[number];
-            } else if (minValuesBuffer[number] == min) {
-                if (index > minIndexesBuffer[number])
-                    index = minIndexesBuffer[number];
-            }
          }
+    }
  
-        for (uint32_t number = quarterPoints * 4; number < num_points; number++) {
-            if (source[number] < min) {
-                index = number;
-                min = source[number];
-            }
+    for (uint32_t number = quarterPoints * 4; number < num_points; number++) {
+        if (source[number] < min) {
+            index = number;
+            min = source[number];
          }
-        target[0] = (uint32_t)index;
      }
+    target[0] = (uint32_t)index;
  }
  
  #endif /*LV_HAVE_SSE4_1*/
@@ -141,61 +139,59 @@ static inline void volk_32f_index_min_32u_a_sse4_1(uint32_t* target,
  static inline void
  volk_32f_index_min_32u_a_sse(uint32_t* target, const float* source, uint32_t num_points)
  {
-    if (num_points > 0) {
-        const uint32_t quarterPoints = num_points / 4;
+    const uint32_t quarterPoints = num_points / 4;
  
-        float* inputPtr = (float*)source;
+    float* inputPtr = (float*)source;
  
-        __m128 indexIncrementValues = _mm_set1_ps(4);
-        __m128 currentIndexes = _mm_set_ps(-1, -2, -3, -4);
+    __m128 indexIncrementValues = _mm_set1_ps(4);
+    __m128 currentIndexes = _mm_set_ps(-1, -2, -3, -4);
  
-        float min = source[0];
-        float index = 0;
-        __m128 minValues = _mm_set1_ps(min);
-        __m128 minValuesIndex = _mm_setzero_ps();
-        __m128 compareResults;
-        __m128 currentValues;
+    float min = source[0];
+    float index = 0;
+    __m128 minValues = _mm_set1_ps(min);
+    __m128 minValuesIndex = _mm_setzero_ps();
+    __m128 compareResults;
+    __m128 currentValues;
  
-        __VOLK_ATTR_ALIGNED(16) float minValuesBuffer[4];
-        __VOLK_ATTR_ALIGNED(16) float minIndexesBuffer[4];
+    __VOLK_ATTR_ALIGNED(16) float minValuesBuffer[4];
+    __VOLK_ATTR_ALIGNED(16) float minIndexesBuffer[4];
  
-        for (uint32_t number = 0; number < quarterPoints; number++) {
+    for (uint32_t number = 0; number < quarterPoints; number++) {
  
-            currentValues = _mm_load_ps(inputPtr);
-            inputPtr += 4;
-            currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
+        currentValues = _mm_load_ps(inputPtr);
+        inputPtr += 4;
+        currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
  
-            compareResults = _mm_cmplt_ps(currentValues, minValues);
+        compareResults = _mm_cmplt_ps(currentValues, minValues);
  
-            minValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, currentIndexes),
-                                       _mm_andnot_ps(compareResults, minValuesIndex));
+        minValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, currentIndexes),
+                                   _mm_andnot_ps(compareResults, minValuesIndex));
  
-            minValues = _mm_or_ps(_mm_and_ps(compareResults, currentValues),
-                                  _mm_andnot_ps(compareResults, minValues));
-        }
+        minValues = _mm_or_ps(_mm_and_ps(compareResults, currentValues),
+                              _mm_andnot_ps(compareResults, minValues));
+    }
  
-        // Calculate the smallest value from the remaining 4 points
-        _mm_store_ps(minValuesBuffer, minValues);
-        _mm_store_ps(minIndexesBuffer, minValuesIndex);
+    // Calculate the smallest value from the remaining 4 points
+    _mm_store_ps(minValuesBuffer, minValues);
+    _mm_store_ps(minIndexesBuffer, minValuesIndex);
  
-        for (uint32_t number = 0; number < 4; number++) {
-            if (minValuesBuffer[number] < min) {
+    for (uint32_t number = 0; number < 4; number++) {
+        if (minValuesBuffer[number] < min) {
+            index = minIndexesBuffer[number];
+            min = minValuesBuffer[number];
+        } else if (minValuesBuffer[number] == min) {
+            if (index > minIndexesBuffer[number])
                  index = minIndexesBuffer[number];
-                min = minValuesBuffer[number];
-            } else if (minValuesBuffer[number] == min) {
-                if (index > minIndexesBuffer[number])
-                    index = minIndexesBuffer[number];
-            }
          }
+    }
  
-        for (uint32_t number = quarterPoints * 4; number < num_points; number++) {
-            if (source[number] < min) {
-                index = number;
-                min = source[number];
-            }
+    for (uint32_t number = quarterPoints * 4; number < num_points; number++) {
+        if (source[number] < min) {
+            index = number;
+            min = source[number];
          }
-        target[0] = (uint32_t)index;
      }
+    target[0] = (uint32_t)index;
  }
  
  #endif /*LV_HAVE_SSE*/
@@ -207,56 +203,54 @@ volk_32f_index_min_32u_a_sse(uint32_t* target, const float* source, uint32_t num
  static inline void
  volk_32f_index_min_32u_a_avx(uint32_t* target, const float* source, uint32_t num_points)
  {
-    if (num_points > 0) {
-        const uint32_t quarterPoints = num_points / 8;
-
-        float* inputPtr = (float*)source;
-
-        __m256 indexIncrementValues = _mm256_set1_ps(8);
-        __m256 currentIndexes = _mm256_set_ps(-1, -2, -3, -4, -5, -6, -7, -8);
-
-        float min = source[0];
-        float index = 0;
-        __m256 minValues = _mm256_set1_ps(min);
-        __m256 minValuesIndex = _mm256_setzero_ps();
-        __m256 compareResults;
-        __m256 currentValues;
-
-        __VOLK_ATTR_ALIGNED(32) float minValuesBuffer[8];
-        __VOLK_ATTR_ALIGNED(32) float minIndexesBuffer[8];
-
-        for (uint32_t number = 0; number < quarterPoints; number++) {
-            currentValues = _mm256_load_ps(inputPtr);
-            inputPtr += 8;
-            currentIndexes = _mm256_add_ps(currentIndexes, indexIncrementValues);
-            compareResults = _mm256_cmp_ps(currentValues, minValues, _CMP_LT_OS);
-            minValuesIndex =
-                _mm256_blendv_ps(minValuesIndex, currentIndexes, compareResults);
-            minValues = _mm256_blendv_ps(minValues, currentValues, compareResults);
-        }
+    const uint32_t quarterPoints = num_points / 8;
+
+    float* inputPtr = (float*)source;
+
+    __m256 indexIncrementValues = _mm256_set1_ps(8);
+    __m256 currentIndexes = _mm256_set_ps(-1, -2, -3, -4, -5, -6, -7, -8);
+
+    float min = source[0];
+    float index = 0;
+    __m256 minValues = _mm256_set1_ps(min);
+    __m256 minValuesIndex = _mm256_setzero_ps();
+    __m256 compareResults;
+    __m256 currentValues;
+
+    __VOLK_ATTR_ALIGNED(32) float minValuesBuffer[8];
+    __VOLK_ATTR_ALIGNED(32) float minIndexesBuffer[8];
+
+    for (uint32_t number = 0; number < quarterPoints; number++) {
+        currentValues = _mm256_load_ps(inputPtr);
+        inputPtr += 8;
+        currentIndexes = _mm256_add_ps(currentIndexes, indexIncrementValues);
+        compareResults = _mm256_cmp_ps(currentValues, minValues, _CMP_LT_OS);
+        minValuesIndex =
+            _mm256_blendv_ps(minValuesIndex, currentIndexes, compareResults);
+        minValues = _mm256_blendv_ps(minValues, currentValues, compareResults);
+    }
  
-        // Calculate the smallest value from the remaining 8 points
-        _mm256_store_ps(minValuesBuffer, minValues);
-        _mm256_store_ps(minIndexesBuffer, minValuesIndex);
+    // Calculate the smallest value from the remaining 8 points
+    _mm256_store_ps(minValuesBuffer, minValues);
+    _mm256_store_ps(minIndexesBuffer, minValuesIndex);
  
-        for (uint32_t number = 0; number < 8; number++) {
-            if (minValuesBuffer[number] < min) {
+    for (uint32_t number = 0; number < 8; number++) {
+        if (minValuesBuffer[number] < min) {
+            index = minIndexesBuffer[number];
+            min = minValuesBuffer[number];
+        } else if (minValuesBuffer[number] == min) {
+            if (index > minIndexesBuffer[number])
                  index = minIndexesBuffer[number];
-                min = minValuesBuffer[number];
-            } else if (minValuesBuffer[number] == min) {
-                if (index > minIndexesBuffer[number])
-                    index = minIndexesBuffer[number];
-            }
          }
+    }
  
-        for (uint32_t number = quarterPoints * 8; number < num_points; number++) {
-            if (source[number] < min) {
-                index = number;
-                min = source[number];
-            }
+    for (uint32_t number = quarterPoints * 8; number < num_points; number++) {
+        if (source[number] < min) {
+            index = number;
+            min = source[number];
          }
-        target[0] = (uint32_t)index;
      }
+    target[0] = (uint32_t)index;
  }
  
  #endif /*LV_HAVE_AVX*/
@@ -268,58 +262,56 @@ volk_32f_index_min_32u_a_avx(uint32_t* target, const float* source, uint32_t num
  static inline void
  volk_32f_index_min_32u_neon(uint32_t* target, const float* source, uint32_t num_points)
  {
-    if (num_points > 0) {
-        const uint32_t quarterPoints = num_points / 4;
-
-        float* inputPtr = (float*)source;
-        float32x4_t indexIncrementValues = vdupq_n_f32(4);
-        __VOLK_ATTR_ALIGNED(16)
-        float currentIndexes_float[4] = { -4.0f, -3.0f, -2.0f, -1.0f };
-        float32x4_t currentIndexes = vld1q_f32(currentIndexes_float);
-
-        float min = source[0];
-        float index = 0;
-        float32x4_t minValues = vdupq_n_f32(min);
-        uint32x4_t minValuesIndex = vmovq_n_u32(0);
-        uint32x4_t compareResults;
-        uint32x4_t currentIndexes_u;
-        float32x4_t currentValues;
-
-        __VOLK_ATTR_ALIGNED(16) float minValuesBuffer[4];
-        __VOLK_ATTR_ALIGNED(16) float minIndexesBuffer[4];
-
-        for (uint32_t number = 0; number < quarterPoints; number++) {
-            currentValues = vld1q_f32(inputPtr);
-            inputPtr += 4;
-            currentIndexes = vaddq_f32(currentIndexes, indexIncrementValues);
-            currentIndexes_u = vcvtq_u32_f32(currentIndexes);
-            compareResults = vcgeq_f32(currentValues, minValues);
-            minValuesIndex = vorrq_u32(vandq_u32(compareResults, minValuesIndex),
-                                       vbicq_u32(currentIndexes_u, compareResults));
-            minValues = vminq_f32(currentValues, minValues);
-        }
+    const uint32_t quarterPoints = num_points / 4;
+
+    float* inputPtr = (float*)source;
+    float32x4_t indexIncrementValues = vdupq_n_f32(4);
+    __VOLK_ATTR_ALIGNED(16)
+    float currentIndexes_float[4] = { -4.0f, -3.0f, -2.0f, -1.0f };
+    float32x4_t currentIndexes = vld1q_f32(currentIndexes_float);
+
+    float min = source[0];
+    float index = 0;
+    float32x4_t minValues = vdupq_n_f32(min);
+    uint32x4_t minValuesIndex = vmovq_n_u32(0);
+    uint32x4_t compareResults;
+    uint32x4_t currentIndexes_u;
+    float32x4_t currentValues;
+
+    __VOLK_ATTR_ALIGNED(16) float minValuesBuffer[4];
+    __VOLK_ATTR_ALIGNED(16) float minIndexesBuffer[4];
+
+    for (uint32_t number = 0; number < quarterPoints; number++) {
+        currentValues = vld1q_f32(inputPtr);
+        inputPtr += 4;
+        currentIndexes = vaddq_f32(currentIndexes, indexIncrementValues);
+        currentIndexes_u = vcvtq_u32_f32(currentIndexes);
+        compareResults = vcgeq_f32(currentValues, minValues);
+        minValuesIndex = vorrq_u32(vandq_u32(compareResults, minValuesIndex),
+                                   vbicq_u32(currentIndexes_u, compareResults));
+        minValues = vminq_f32(currentValues, minValues);
+    }
  
-        // Calculate the smallest value from the remaining 4 points
-        vst1q_f32(minValuesBuffer, minValues);
-        vst1q_f32(minIndexesBuffer, vcvtq_f32_u32(minValuesIndex));
-        for (uint32_t number = 0; number < 4; number++) {
-            if (minValuesBuffer[number] < min) {
+    // Calculate the smallest value from the remaining 4 points
+    vst1q_f32(minValuesBuffer, minValues);
+    vst1q_f32(minIndexesBuffer, vcvtq_f32_u32(minValuesIndex));
+    for (uint32_t number = 0; number < 4; number++) {
+        if (minValuesBuffer[number] < min) {
+            index = minIndexesBuffer[number];
+            min = minValuesBuffer[number];
+        } else if (minValues[number] == min) {
+            if (index > minIndexesBuffer[number])
                  index = minIndexesBuffer[number];
-                min = minValuesBuffer[number];
-            } else if (minValues[number] == min) {
-                if (index > minIndexesBuffer[number])
-                    index = minIndexesBuffer[number];
-            }
          }
+    }
  
-        for (uint32_t number = quarterPoints * 4; number < num_points; number++) {
-            if (source[number] < min) {
-                index = number;
-                min = source[number];
-            }
+    for (uint32_t number = quarterPoints * 4; number < num_points; number++) {
+        if (source[number] < min) {
+            index = number;
+            min = source[number];
          }
-        target[0] = (uint32_t)index;
      }
+    target[0] = (uint32_t)index;
  }
  
  #endif /*LV_HAVE_NEON*/
@@ -330,18 +322,16 @@ volk_32f_index_min_32u_neon(uint32_t* target, const float* source, uint32_t num_
  static inline void
  volk_32f_index_min_32u_generic(uint32_t* target, const float* source, uint32_t num_points)
  {
-    if (num_points > 0) {
-        float min = source[0];
-        uint32_t index = 0;
-
-        for (uint32_t i = 1; i < num_points; ++i) {
-            if (source[i] < min) {
-                index = i;
-                min = source[i];
-            }
+    float min = source[0];
+    uint32_t index = 0;
+
+    for (uint32_t i = 1; i < num_points; ++i) {
+        if (source[i] < min) {
+            index = i;
+            min = source[i];
          }
-        target[0] = index;
      }
+    target[0] = index;
  }
  
  #endif /*LV_HAVE_GENERIC*/
@@ -364,56 +354,54 @@ volk_32f_index_min_32u_generic(uint32_t* target, const float* source, uint32_t n
  static inline void
  volk_32f_index_min_32u_u_avx(uint32_t* target, const float* source, uint32_t num_points)
  {
-    if (num_points > 0) {
-        const uint32_t quarterPoints = num_points / 8;
-
-        float* inputPtr = (float*)source;
-
-        __m256 indexIncrementValues = _mm256_set1_ps(8);
-        __m256 currentIndexes = _mm256_set_ps(-1, -2, -3, -4, -5, -6, -7, -8);
-
-        float min = source[0];
-        float index = 0;
-        __m256 minValues = _mm256_set1_ps(min);
-        __m256 minValuesIndex = _mm256_setzero_ps();
-        __m256 compareResults;
-        __m256 currentValues;
-
-        __VOLK_ATTR_ALIGNED(32) float minValuesBuffer[8];
-        __VOLK_ATTR_ALIGNED(32) float minIndexesBuffer[8];
-
-        for (uint32_t number = 0; number < quarterPoints; number++) {
-            currentValues = _mm256_loadu_ps(inputPtr);
-            inputPtr += 8;
-            currentIndexes = _mm256_add_ps(currentIndexes, indexIncrementValues);
-            compareResults = _mm256_cmp_ps(currentValues, minValues, _CMP_LT_OS);
-            minValuesIndex =
-                _mm256_blendv_ps(minValuesIndex, currentIndexes, compareResults);
-            minValues = _mm256_blendv_ps(minValues, currentValues, compareResults);
-        }
+    const uint32_t quarterPoints = num_points / 8;
+
+    float* inputPtr = (float*)source;
+
+    __m256 indexIncrementValues = _mm256_set1_ps(8);
+    __m256 currentIndexes = _mm256_set_ps(-1, -2, -3, -4, -5, -6, -7, -8);
+
+    float min = source[0];
+    float index = 0;
+    __m256 minValues = _mm256_set1_ps(min);
+    __m256 minValuesIndex = _mm256_setzero_ps();
+    __m256 compareResults;
+    __m256 currentValues;
+
+    __VOLK_ATTR_ALIGNED(32) float minValuesBuffer[8];
+    __VOLK_ATTR_ALIGNED(32) float minIndexesBuffer[8];
+
+    for (uint32_t number = 0; number < quarterPoints; number++) {
+        currentValues = _mm256_loadu_ps(inputPtr);
+        inputPtr += 8;
+        currentIndexes = _mm256_add_ps(currentIndexes, indexIncrementValues);
+        compareResults = _mm256_cmp_ps(currentValues, minValues, _CMP_LT_OS);
+        minValuesIndex =
+            _mm256_blendv_ps(minValuesIndex, currentIndexes, compareResults);
+        minValues = _mm256_blendv_ps(minValues, currentValues, compareResults);
+    }
  
-        // Calculate the smalles value from the remaining 8 points
-        _mm256_store_ps(minValuesBuffer, minValues);
-        _mm256_store_ps(minIndexesBuffer, minValuesIndex);
+    // Calculate the smalles value from the remaining 8 points
+    _mm256_store_ps(minValuesBuffer, minValues);
+    _mm256_store_ps(minIndexesBuffer, minValuesIndex);
  
-        for (uint32_t number = 0; number < 8; number++) {
-            if (minValuesBuffer[number] < min) {
+    for (uint32_t number = 0; number < 8; number++) {
+        if (minValuesBuffer[number] < min) {
+            index = minIndexesBuffer[number];
+            min = minValuesBuffer[number];
+        } else if (minValuesBuffer[number] == min) {
+            if (index > minIndexesBuffer[number])
                  index = minIndexesBuffer[number];
-                min = minValuesBuffer[number];
-            } else if (minValuesBuffer[number] == min) {
-                if (index > minIndexesBuffer[number])
-                    index = minIndexesBuffer[number];
-            }
          }
+    }
  
-        for (uint32_t number = quarterPoints * 8; number < num_points; number++) {
-            if (source[number] < min) {
-                index = number;
-                min = source[number];
-            }
+    for (uint32_t number = quarterPoints * 8; number < num_points; number++) {
+        if (source[number] < min) {
+            index = number;
+            min = source[number];
          }
-        target[0] = (uint32_t)index;
      }
+    target[0] = (uint32_t)index;
  }
  
  #endif /*LV_HAVE_AVX*/
@@ -426,56 +414,54 @@ static inline void volk_32f_index_min_32u_u_sse4_1(uint32_t* target,
                                                     const float* source,
                                                     uint32_t num_points)
  {
-    if (num_points > 0) {
-        const uint32_t quarterPoints = num_points / 4;
-
-        float* inputPtr = (float*)source;
-
-        __m128 indexIncrementValues = _mm_set1_ps(4);
-        __m128 currentIndexes = _mm_set_ps(-1, -2, -3, -4);
-
-        float min = source[0];
-        float index = 0;
-        __m128 minValues = _mm_set1_ps(min);
-        __m128 minValuesIndex = _mm_setzero_ps();
-        __m128 compareResults;
-        __m128 currentValues;
-
-        __VOLK_ATTR_ALIGNED(16) float minValuesBuffer[4];
-        __VOLK_ATTR_ALIGNED(16) float minIndexesBuffer[4];
-
-        for (uint32_t number = 0; number < quarterPoints; number++) {
-            currentValues = _mm_loadu_ps(inputPtr);
-            inputPtr += 4;
-            currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
-            compareResults = _mm_cmplt_ps(currentValues, minValues);
-            minValuesIndex =
-                _mm_blendv_ps(minValuesIndex, currentIndexes, compareResults);
-            minValues = _mm_blendv_ps(minValues, currentValues, compareResults);
-        }
+    const uint32_t quarterPoints = num_points / 4;
+
+    float* inputPtr = (float*)source;
+
+    __m128 indexIncrementValues = _mm_set1_ps(4);
+    __m128 currentIndexes = _mm_set_ps(-1, -2, -3, -4);
+
+    float min = source[0];
+    float index = 0;
+    __m128 minValues = _mm_set1_ps(min);
+    __m128 minValuesIndex = _mm_setzero_ps();
+    __m128 compareResults;
+    __m128 currentValues;
+
+    __VOLK_ATTR_ALIGNED(16) float minValuesBuffer[4];
+    __VOLK_ATTR_ALIGNED(16) float minIndexesBuffer[4];
+
+    for (uint32_t number = 0; number < quarterPoints; number++) {
+        currentValues = _mm_loadu_ps(inputPtr);
+        inputPtr += 4;
+        currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
+        compareResults = _mm_cmplt_ps(currentValues, minValues);
+        minValuesIndex =
+            _mm_blendv_ps(minValuesIndex, currentIndexes, compareResults);
+        minValues = _mm_blendv_ps(minValues, currentValues, compareResults);
+    }
  
-        // Calculate the smallest value from the remaining 4 points
-        _mm_store_ps(minValuesBuffer, minValues);
-        _mm_store_ps(minIndexesBuffer, minValuesIndex);
+    // Calculate the smallest value from the remaining 4 points
+    _mm_store_ps(minValuesBuffer, minValues);
+    _mm_store_ps(minIndexesBuffer, minValuesIndex);
  
-        for (uint32_t number = 0; number < 4; number++) {
-            if (minValuesBuffer[number] < min) {
+    for (uint32_t number = 0; number < 4; number++) {
+        if (minValuesBuffer[number] < min) {
+            index = minIndexesBuffer[number];
+            min = minValuesBuffer[number];
+        } else if (minValuesBuffer[number] == min) {
+            if (index > minIndexesBuffer[number])
                  index = minIndexesBuffer[number];
-                min = minValuesBuffer[number];
-            } else if (minValuesBuffer[number] == min) {
-                if (index > minIndexesBuffer[number])
-                    index = minIndexesBuffer[number];
-            }
          }
+    }
  
-        for (uint32_t number = quarterPoints * 4; number < num_points; number++) {
-            if (source[number] < min) {
-                index = number;
-                min = source[number];
-            }
+    for (uint32_t number = quarterPoints * 4; number < num_points; number++) {
+        if (source[number] < min) {
+            index = number;
+            min = source[number];
          }
-        target[0] = (uint32_t)index;
      }
+    target[0] = (uint32_t)index;
  }
  
  #endif /*LV_HAVE_SSE4_1*/
@@ -486,57 +472,55 @@ static inline void volk_32f_index_min_32u_u_sse4_1(uint32_t* target,
  static inline void
  volk_32f_index_min_32u_u_sse(uint32_t* target, const float* source, uint32_t num_points)
  {
-    if (num_points > 0) {
-        const uint32_t quarterPoints = num_points / 4;
-
-        float* inputPtr = (float*)source;
-
-        __m128 indexIncrementValues = _mm_set1_ps(4);
-        __m128 currentIndexes = _mm_set_ps(-1, -2, -3, -4);
-
-        float min = source[0];
-        float index = 0;
-        __m128 minValues = _mm_set1_ps(min);
-        __m128 minValuesIndex = _mm_setzero_ps();
-        __m128 compareResults;
-        __m128 currentValues;
-
-        __VOLK_ATTR_ALIGNED(16) float minValuesBuffer[4];
-        __VOLK_ATTR_ALIGNED(16) float minIndexesBuffer[4];
-
-        for (uint32_t number = 0; number < quarterPoints; number++) {
-            currentValues = _mm_loadu_ps(inputPtr);
-            inputPtr += 4;
-            currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
-            compareResults = _mm_cmplt_ps(currentValues, minValues);
-            minValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, currentIndexes),
-                                       _mm_andnot_ps(compareResults, minValuesIndex));
-            minValues = _mm_or_ps(_mm_and_ps(compareResults, currentValues),
-                                  _mm_andnot_ps(compareResults, minValues));
-        }
+    const uint32_t quarterPoints = num_points / 4;
+
+    float* inputPtr = (float*)source;
+
+    __m128 indexIncrementValues = _mm_set1_ps(4);
+    __m128 currentIndexes = _mm_set_ps(-1, -2, -3, -4);
+
+    float min = source[0];
+    float index = 0;
+    __m128 minValues = _mm_set1_ps(min);
+    __m128 minValuesIndex = _mm_setzero_ps();
+    __m128 compareResults;
+    __m128 currentValues;
+
+    __VOLK_ATTR_ALIGNED(16) float minValuesBuffer[4];
+    __VOLK_ATTR_ALIGNED(16) float minIndexesBuffer[4];
+
+    for (uint32_t number = 0; number < quarterPoints; number++) {
+        currentValues = _mm_loadu_ps(inputPtr);
+        inputPtr += 4;
+        currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
+        compareResults = _mm_cmplt_ps(currentValues, minValues);
+        minValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, currentIndexes),
+                                   _mm_andnot_ps(compareResults, minValuesIndex));
+        minValues = _mm_or_ps(_mm_and_ps(compareResults, currentValues),
+                              _mm_andnot_ps(compareResults, minValues));
+    }
  
-        // Calculate the smallest value from the remaining 4 points
-        _mm_store_ps(minValuesBuffer, minValues);
-        _mm_store_ps(minIndexesBuffer, minValuesIndex);
+    // Calculate the smallest value from the remaining 4 points
+    _mm_store_ps(minValuesBuffer, minValues);
+    _mm_store_ps(minIndexesBuffer, minValuesIndex);
  
-        for (uint32_t number = 0; number < 4; number++) {
-            if (minValuesBuffer[number] < min) {
+    for (uint32_t number = 0; number < 4; number++) {
+        if (minValuesBuffer[number] < min) {
+            index = minIndexesBuffer[number];
+            min = minValuesBuffer[number];
+        } else if (minValuesBuffer[number] == min) {
+            if (index > minIndexesBuffer[number])
                  index = minIndexesBuffer[number];
-                min = minValuesBuffer[number];
-            } else if (minValuesBuffer[number] == min) {
-                if (index > minIndexesBuffer[number])
-                    index = minIndexesBuffer[number];
-            }
          }
+    }
  
-        for (uint32_t number = quarterPoints * 4; number < num_points; number++) {
-            if (source[number] < min) {
-                index = number;
-                min = source[number];
-            }
+    for (uint32_t number = quarterPoints * 4; number < num_points; number++) {
+        if (source[number] < min) {
+            index = number;
+            min = source[number];
          }
-        target[0] = (uint32_t)index;
      }
+    target[0] = (uint32_t)index;
  }
  
  #endif /*LV_HAVE_SSE*/
diff --git a/kernels/volk/volk_32fc_index_min_16u.h b/kernels/volk/volk_32fc_index_min_16u.h

index 8f40730c76c0b8a35fdcf5223c295b7579c3ce54..6ddd8a3240391bb19134bd5bc7168fa927b19a26 100644 (file)
--- a/kernels/volk/volk_32fc_index_min_16u.h
+++ b/kernels/volk/volk_32fc_index_min_16u.h
@@ -2,14 +2,14 @@
  /*
   * Copyright 2021 Free Software Foundation, Inc.
   *
- * This file is part of GNU Radio
+ * This file is part of VOLK
   *
- * GNU Radio is free software; you can redistribute it and/or modify
+ * VOLK is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
   * the Free Software Foundation; either version 3, or (at your option)
   * any later version.
   *
- * GNU Radio is distributed in the hope that it will be useful,
+ * VOLK is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
@@ -210,7 +210,6 @@ static inline void
  volk_32fc_index_min_16u_a_sse3(uint16_t* target, lv_32fc_t* source, uint32_t num_points)
  {
      num_points = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
-    const uint32_t num_bytes = num_points * 8;
  
      union bit128 holderf;
      union bit128 holderi;
@@ -230,7 +229,7 @@ volk_32fc_index_min_16u_a_sse3(uint16_t* target, lv_32fc_t* source, uint32_t num
      xmm10 = _mm_setr_epi32(4, 4, 4, 4);
      xmm3 = _mm_set_ps1(FLT_MAX);
  
-    int bound = num_bytes >> 5;
+    int bound = num_points >> 2;
  
      for (int i = 0; i < bound; ++i) {
          xmm1 = _mm_load_ps((float*)source);
@@ -256,7 +255,7 @@ volk_32fc_index_min_16u_a_sse3(uint16_t* target, lv_32fc_t* source, uint32_t num
          xmm8 = _mm_add_epi32(xmm8, xmm10);
      }
  
-    if (num_bytes >> 4 & 1) {
+    if (num_points >> 1 & 1) {
          xmm2 = _mm_load_ps((float*)source);
  
          xmm1 = _mm_movelh_ps(bit128_p(&xmm8)->float_vec, bit128_p(&xmm8)->float_vec);
@@ -283,7 +282,7 @@ volk_32fc_index_min_16u_a_sse3(uint16_t* target, lv_32fc_t* source, uint32_t num
          xmm8 = _mm_add_epi32(xmm8, xmm10);
      }
  
-    if (num_bytes >> 3 & 1) {
+    if (num_points & 1) {
          sq_dist = lv_creal(source[0]) * lv_creal(source[0]) +
                    lv_cimag(source[0]) * lv_cimag(source[0]);
  
@@ -324,13 +323,12 @@ static inline void
  volk_32fc_index_min_16u_generic(uint16_t* target, lv_32fc_t* source, uint32_t num_points)
  {
      num_points = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
-    const uint32_t num_bytes = num_points * 8;
  
      float sq_dist = 0.0;
      float min = FLT_MAX;
      uint16_t index = 0;
  
-    for (uint32_t i = 0; i<num_bytes>> 3; ++i) {
+    for (uint32_t i = 0; i < num_points; ++i) {
          sq_dist = lv_creal(source[i]) * lv_creal(source[i]) +
                    lv_cimag(source[i]) * lv_cimag(source[i]);
  
diff --git a/kernels/volk/volk_32fc_index_min_32u.h b/kernels/volk/volk_32fc_index_min_32u.h

index efa33ee9318809903239ca013fe246fcf84c367d..d5e2a00ea0429c0e8adf437f8f7525a325aa6136 100644 (file)
--- a/kernels/volk/volk_32fc_index_min_32u.h
+++ b/kernels/volk/volk_32fc_index_min_32u.h
@@ -2,14 +2,14 @@
  /*
   * Copyright 2021 Free Software Foundation, Inc.
   *
- * This file is part of GNU Radio
+ * This file is part of VOLK
   *
- * GNU Radio is free software; you can redistribute it and/or modify
+ * VOLK is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
   * the Free Software Foundation; either version 3, or (at your option)
   * any later version.
   *
- * GNU Radio is distributed in the hope that it will be useful,
+ * VOLK is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
@@ -198,8 +198,6 @@ static inline void volk_32fc_index_min_32u_a_avx2_variant_1(uint32_t* target,
  static inline void
  volk_32fc_index_min_32u_a_sse3(uint32_t* target, lv_32fc_t* source, uint32_t num_points)
  {
-    const uint32_t num_bytes = num_points * 8;
-
      union bit128 holderf;
      union bit128 holderi;
      float sq_dist = 0.0;
@@ -218,7 +216,7 @@ volk_32fc_index_min_32u_a_sse3(uint32_t* target, lv_32fc_t* source, uint32_t num
      xmm10 = _mm_setr_epi32(4, 4, 4, 4);
      xmm3 = _mm_set_ps1(FLT_MAX);
  
-    int bound = num_bytes >> 5;
+    int bound = num_points >> 2;
  
      for (int i = 0; i < bound; ++i) {
          xmm1 = _mm_load_ps((float*)source);
@@ -244,7 +242,7 @@ volk_32fc_index_min_32u_a_sse3(uint32_t* target, lv_32fc_t* source, uint32_t num
          xmm8 = _mm_add_epi32(xmm8, xmm10);
      }
  
-    if (num_bytes >> 4 & 1) {
+    if (num_points >> 1 & 1) {
          xmm2 = _mm_load_ps((float*)source);
  
          xmm1 = _mm_movelh_ps(bit128_p(&xmm8)->float_vec, bit128_p(&xmm8)->float_vec);
@@ -271,7 +269,7 @@ volk_32fc_index_min_32u_a_sse3(uint32_t* target, lv_32fc_t* source, uint32_t num
          xmm8 = _mm_add_epi32(xmm8, xmm10);
      }
  
-    if (num_bytes >> 3 & 1) {
+    if (num_points & 1) {
          sq_dist = lv_creal(source[0]) * lv_creal(source[0]) +
                    lv_cimag(source[0]) * lv_cimag(source[0]);
  
@@ -311,13 +309,11 @@ volk_32fc_index_min_32u_a_sse3(uint32_t* target, lv_32fc_t* source, uint32_t num
  static inline void
  volk_32fc_index_min_32u_generic(uint32_t* target, lv_32fc_t* source, uint32_t num_points)
  {
-    const uint32_t num_bytes = num_points * 8;
-
      float sq_dist = 0.0;
      float min = FLT_MAX;
      uint32_t index = 0;
  
-    for (uint32_t i = 0; i<num_bytes>> 3; ++i) {
+    for (uint32_t i = 0; i < num_points; ++i) {
          sq_dist = lv_creal(source[i]) * lv_creal(source[i]) +
                    lv_cimag(source[i]) * lv_cimag(source[i]);
author	Zlika <zlika_ese@hotmail.com>
	Mon, 5 Jul 2021 11:05:18 +0000 (13:05 +0200)
committer	A. Maitland Bottoms <bottoms@debian.org>
	Fri, 22 Oct 2021 03:30:05 +0000 (04:30 +0100)
kernels/volk/volk_32f_index_min_16u.h		patch \| blob \| history
kernels/volk/volk_32f_index_min_32u.h		patch \| blob \| history
kernels/volk/volk_32fc_index_min_16u.h		patch \| blob \| history
kernels/volk/volk_32fc_index_min_32u.h		patch \| blob \| history