From 781e0d7bd2748e76cab5a22d1109cbcf96a1a608 Mon Sep 17 00:00:00 2001 From: "A. Maitland Bottoms" Date: Sun, 4 Sep 2022 17:00:56 +0100 Subject: [PATCH 1/1] Import volk_2.5.2.orig.tar.xz [dgit import orig volk_2.5.2.orig.tar.xz] --- .clang-format | 106 + .github/workflows/android_build.yml | 49 + .github/workflows/check-pr-formatting.yml | 23 + .github/workflows/run-tests.yml | 245 + .gitignore | 5 + .gitlab-ci.yml | 30 + .gitmodules | 3 + .lastrelease | 1 + .travis.yml | 67 + .zenodo.json | 261 + CMakeLists.txt | 404 + COPYING | 684 ++ COPYING-GPL | 1 + COPYING-LGPL | 175 + README.md | 122 + apps/CMakeLists.txt | 91 + apps/plot_best_vs_generic.py | 44 + apps/volk-config-info.cc | 73 + apps/volk_option_helpers.cc | 241 + apps/volk_option_helpers.h | 83 + apps/volk_profile.cc | 335 + apps/volk_profile.h | 23 + appveyor.yml | 38 + cmake/Modules/CMakeParseArgumentsCopy.cmake | 145 + cmake/Modules/FindFILESYSTEM.cmake | 261 + cmake/Modules/FindORC.cmake | 49 + cmake/Modules/VolkAddTest.cmake | 208 + cmake/Modules/VolkBuildTypes.cmake | 199 + cmake/Modules/VolkConfig.cmake.in | 41 + cmake/Modules/VolkConfigVersion.cmake.in | 21 + cmake/Modules/VolkPython.cmake | 264 + cmake/Modules/VolkVersion.cmake | 73 + cmake/Toolchains/aarch64-linux-gnu.cmake | 33 + cmake/Toolchains/arm-linux-gnueabihf.cmake | 36 + .../arm_cortex_a15_hardfp_native.cmake | 9 + .../arm_cortex_a53_hardfp_native.cmake | 9 + .../arm_cortex_a72_hardfp_native.cmake | 9 + .../arm_cortex_a8_hardfp_native.cmake | 9 + .../arm_cortex_a8_softfp_native.cmake | 9 + .../arm_cortex_a9_hardfp_native.cmake | 9 + cmake/Toolchains/intel-sde.cmake | 3 + cmake/Toolchains/oe-sdk_cross.cmake | 15 + cmake/cmake_uninstall.cmake.in | 39 + cmake/msvc/config.h | 58 + cmake/msvc/sys/time.h | 71 + cpu_features/.clang-format | 4 + cpu_features/.dockerignore | 31 + cpu_features/.github/workflows/Dockerfile | 5 + .../.github/workflows/aarch64_linux.yml | 28 + .../.github/workflows/amd64_freebsd.yml | 22 + .../.github/workflows/amd64_linux.yml | 31 + .../.github/workflows/amd64_macos.yml | 43 + .../.github/workflows/amd64_windows.yml | 25 + cpu_features/.github/workflows/arm_linux.yml | 31 + .../.github/workflows/clang_format.yml | 24 + cpu_features/.github/workflows/mips_linux.yml | 30 + cpu_features/.gitignore | 13 + cpu_features/CMakeLists.txt | 249 + cpu_features/CONTRIBUTING.md | 23 + cpu_features/LICENSE | 230 + cpu_features/README.md | 228 + cpu_features/ci/Makefile | 240 + cpu_features/ci/README.md | 66 + cpu_features/ci/doc/docker.dot | 64 + cpu_features/ci/doc/docker.svg | 312 + cpu_features/ci/doc/generate_image.sh | 7 + cpu_features/ci/docker/amd64/Dockerfile | 48 + cpu_features/ci/docker/toolchain/Dockerfile | 34 + cpu_features/ci/sample/CMakeLists.txt | 22 + cpu_features/ci/sample/main.cpp | 11 + cpu_features/ci/vagrant/freebsd/Vagrantfile | 102 + cpu_features/cmake/CpuFeaturesConfig.cmake.in | 3 + .../cmake/CpuFeaturesNdkCompatConfig.cmake.in | 3 + cpu_features/cmake/README.md | 30 + .../cmake/googletest.CMakeLists.txt.in | 15 + .../include/cpu_features_cache_info.h | 54 + cpu_features/include/cpu_features_macros.h | 242 + cpu_features/include/cpuinfo_aarch64.h | 158 + cpu_features/include/cpuinfo_arm.h | 121 + cpu_features/include/cpuinfo_mips.h | 60 + cpu_features/include/cpuinfo_ppc.h | 149 + cpu_features/include/cpuinfo_x86.h | 255 + cpu_features/include/internal/bit_utils.h | 40 + cpu_features/include/internal/cpuid_x86.h | 37 + cpu_features/include/internal/filesystem.h | 39 + cpu_features/include/internal/hwcaps.h | 190 + .../include/internal/stack_line_reader.h | 49 + cpu_features/include/internal/string_view.h | 110 + cpu_features/ndk_compat/CMakeLists.txt | 60 + cpu_features/ndk_compat/README.md | 4 + cpu_features/ndk_compat/cpu-features.c | 205 + cpu_features/ndk_compat/cpu-features.h | 320 + cpu_features/ndk_compat/ndk-compat-test.c | 12 + cpu_features/scripts/make_release.sh | 72 + cpu_features/scripts/run_integration.sh | 363 + cpu_features/scripts/test_integration.sh | 79 + cpu_features/src/copy.inl | 19 + cpu_features/src/define_introspection.inl | 86 + .../src/define_introspection_and_hwcaps.inl | 26 + cpu_features/src/equals.inl | 22 + cpu_features/src/filesystem.c | 62 + cpu_features/src/hwcaps.c | 175 + .../src/impl_aarch64_linux_or_android.c | 150 + cpu_features/src/impl_arm_linux_or_android.c | 211 + cpu_features/src/impl_mips_linux_or_android.c | 88 + cpu_features/src/impl_ppc_linux.c | 162 + .../src/impl_x86__base_implementation.inl | 1724 ++++ cpu_features/src/impl_x86_freebsd.c | 67 + cpu_features/src/impl_x86_linux_or_android.c | 57 + cpu_features/src/impl_x86_macos.c | 56 + cpu_features/src/impl_x86_windows.c | 49 + cpu_features/src/stack_line_reader.c | 132 + cpu_features/src/string_view.c | 192 + cpu_features/src/utils/list_cpu_features.c | 439 + cpu_features/test/CMakeLists.txt | 91 + cpu_features/test/bit_utils_test.cc | 53 + cpu_features/test/cpuinfo_aarch64_test.cc | 174 + cpu_features/test/cpuinfo_arm_test.cc | 354 + cpu_features/test/cpuinfo_mips_test.cc | 126 + cpu_features/test/cpuinfo_ppc_test.cc | 119 + cpu_features/test/cpuinfo_x86_test.cc | 1099 ++ cpu_features/test/filesystem_for_testing.cc | 103 + cpu_features/test/filesystem_for_testing.h | 61 + cpu_features/test/hwcaps_for_testing.cc | 52 + cpu_features/test/hwcaps_for_testing.h | 31 + cpu_features/test/stack_line_reader_test.cc | 132 + cpu_features/test/string_view_test.cc | 202 + ...AUTHORS_RESUBMITTING_UNDER_LGPL_LICENSE.md | 112 + docs/CHANGELOG.md | 656 ++ docs/CMakeLists.txt | 24 + docs/CODE_OF_CONDUCT.md | 127 + docs/CONTRIBUTING.md | 78 + docs/DCO.txt | 37 + docs/Doxyfile.in | 2364 +++++ docs/DoxygenLayout.xml | 196 + docs/extending_volk.dox | 87 + docs/kernels.dox | 128 + docs/main_page.dox | 17 + docs/terms_and_techniques.dox | 112 + docs/using_volk.dox | 26 + docs/versioning.md | 279 + docs/volk_logo.png | Bin 0 -> 48675 bytes docs/volk_logo_small.png | Bin 0 -> 15496 bytes gen/archs.xml | 176 + gen/machines.xml | 64 + gen/volk_arch_defs.py | 76 + gen/volk_compile_utils.py | 54 + gen/volk_kernel_defs.py | 197 + gen/volk_machine_defs.py | 63 + gen/volk_tmpl_utils.py | 49 + include/volk/constants.h | 25 + include/volk/saturation_arithmetic.h | 40 + include/volk/sse2neon.h | 8864 +++++++++++++++++ include/volk/volk_alloc.hh | 77 + include/volk/volk_avx2_intrinsics.h | 333 + include/volk/volk_avx_intrinsics.h | 195 + include/volk/volk_common.h | 171 + include/volk/volk_complex.h | 106 + include/volk/volk_malloc.h | 62 + include/volk/volk_neon_intrinsics.h | 284 + include/volk/volk_prefs.h | 39 + include/volk/volk_sse3_intrinsics.h | 67 + include/volk/volk_sse_intrinsics.h | 59 + include/volk/volk_version.h.in | 37 + kernels/README.md | 68 + .../neon/volk_16i_max_star_horizontal_16i.s | 51 + .../volk_32f_s32f_multiply_32f_a_neonasm.s | 52 + .../asm/neon/volk_32f_x2_add_32f_a_neonasm.s | 54 + .../neon/volk_32f_x2_add_32f_a_neonpipeline.s | 65 + .../neon/volk_32f_x2_dot_prod_32f_a_neonasm.s | 58 + .../volk_32f_x2_dot_prod_32f_a_neonasm_opts.s | 116 + .../volk_32fc_32f_dot_prod_32fc_a_neonasm.s | 79 + ...olk_32fc_32f_dot_prod_32fc_a_neonasmvmla.s | 74 + ...lk_32fc_32f_dot_prod_32fc_a_neonpipeline.s | 97 + .../volk_32fc_32f_dot_prod_32fc_a_unrollasm.s | 146 + .../volk_32fc_x2_dot_prod_32fc_a_neonasm.s | 98 + ...32fc_x2_dot_prod_32fc_a_neonasm_opttests.s | 96 + .../volk_32fc_x2_multiply_32fc_a_neonasm.s | 47 + ...lk_16ic_deinterleave_16i_x2_a_orc_impl.orc | 5 + ...k_16ic_deinterleave_real_8i_a_orc_impl.orc | 6 + .../volk_16ic_magnitude_16i_a_orc_impl.orc | 23 + ...ic_s32f_deinterleave_32f_x2_a_orc_impl.orc | 12 + ..._16sc_magnitude_32f_aligned16_orc_impl.orc | 25 + .../asm/orc/volk_16u_byteswap_a_orc_impl.orc | 3 + .../orc/volk_32f_s32f_add_32f_a_orc_impl.orc | 5 + .../volk_32f_s32f_multiply_32f_a_orc_impl.orc | 5 + .../volk_32f_s32f_normalize_a_orc_impl.orc | 5 + .../asm/orc/volk_32f_sqrt_32f_a_orc_impl.orc | 4 + .../orc/volk_32f_x2_add_32f_a_orc_impl.orc | 5 + .../orc/volk_32f_x2_divide_32f_a_orc_impl.orc | 5 + .../volk_32f_x2_dot_prod_32f_a_orc_impl.orc | 6 + .../orc/volk_32f_x2_max_32f_a_orc_impl.orc | 5 + .../orc/volk_32f_x2_min_32f_a_orc_impl.orc | 5 + .../volk_32f_x2_multiply_32f_a_orc_impl.orc | 5 + .../volk_32f_x2_subtract_32f_a_orc_impl.orc | 5 + ...volk_32fc_32f_multiply_32fc_a_orc_impl.orc | 7 + .../volk_32fc_magnitude_32f_a_orc_impl.orc | 13 + ...lk_32fc_s32fc_multiply_32fc_a_orc_impl.orc | 18 + .../volk_32fc_x2_multiply_32fc_a_orc_impl.orc | 18 + .../orc/volk_32i_x2_and_32i_a_orc_impl.orc | 5 + .../asm/orc/volk_32i_x2_or_32i_a_orc_impl.orc | 5 + .../orc/volk_8i_convert_16i_a_orc_impl.orc | 6 + .../volk_8i_s32f_convert_32f_a_orc_impl.orc | 11 + kernels/volk/volk_16i_32fc_dot_prod_32fc.h | 692 ++ kernels/volk/volk_16i_branch_4_state_8.h | 204 + kernels/volk/volk_16i_convert_8i.h | 295 + kernels/volk/volk_16i_max_star_16i.h | 172 + .../volk/volk_16i_max_star_horizontal_16i.h | 216 + .../volk/volk_16i_permute_and_scalar_add.h | 190 + kernels/volk/volk_16i_s32f_convert_32f.h | 503 + kernels/volk/volk_16i_x4_quad_max_star_16i.h | 288 + kernels/volk/volk_16i_x5_add_quad_16i_x4.h | 251 + kernels/volk/volk_16ic_convert_32fc.h | 318 + kernels/volk/volk_16ic_deinterleave_16i_x2.h | 378 + .../volk/volk_16ic_deinterleave_real_16i.h | 380 + kernels/volk/volk_16ic_deinterleave_real_8i.h | 418 + kernels/volk/volk_16ic_magnitude_16i.h | 429 + .../volk/volk_16ic_s32f_deinterleave_32f_x2.h | 330 + .../volk_16ic_s32f_deinterleave_real_32f.h | 337 + kernels/volk/volk_16ic_s32f_magnitude_32f.h | 349 + kernels/volk/volk_16ic_x2_dot_prod_16ic.h | 693 ++ kernels/volk/volk_16ic_x2_multiply_16ic.h | 465 + kernels/volk/volk_16u_byteswap.h | 299 + kernels/volk/volk_16u_byteswappuppet_16u.h | 95 + kernels/volk/volk_32f_64f_add_64f.h | 233 + kernels/volk/volk_32f_64f_multiply_64f.h | 192 + kernels/volk/volk_32f_8u_polarbutterfly_32f.h | 394 + .../volk_32f_8u_polarbutterflypuppet_32f.h | 148 + kernels/volk/volk_32f_accumulator_s32f.h | 235 + kernels/volk/volk_32f_acos_32f.h | 504 + kernels/volk/volk_32f_asin_32f.h | 489 + kernels/volk/volk_32f_atan_32f.h | 470 + kernels/volk/volk_32f_binary_slicer_32i.h | 265 + kernels/volk/volk_32f_binary_slicer_8i.h | 504 + kernels/volk/volk_32f_convert_64f.h | 251 + kernels/volk/volk_32f_cos_32f.h | 999 ++ kernels/volk/volk_32f_exp_32f.h | 285 + kernels/volk/volk_32f_expfast_32f.h | 304 + kernels/volk/volk_32f_index_max_16u.h | 362 + kernels/volk/volk_32f_index_max_32u.h | 545 + kernels/volk/volk_32f_index_min_16u.h | 349 + kernels/volk/volk_32f_index_min_32u.h | 511 + kernels/volk/volk_32f_invsqrt_32f.h | 206 + kernels/volk/volk_32f_log2_32f.h | 740 ++ kernels/volk/volk_32f_null_32f.h | 32 + .../volk/volk_32f_s32f_32f_fm_detect_32f.h | 338 + kernels/volk/volk_32f_s32f_add_32f.h | 261 + ...k_32f_s32f_calc_spectral_noise_floor_32f.h | 461 + kernels/volk/volk_32f_s32f_convert_16i.h | 581 ++ kernels/volk/volk_32f_s32f_convert_32i.h | 421 + kernels/volk/volk_32f_s32f_convert_8i.h | 504 + .../volk/volk_32f_s32f_mod_rangepuppet_32f.h | 89 + kernels/volk/volk_32f_s32f_multiply_32f.h | 291 + kernels/volk/volk_32f_s32f_normalize.h | 207 + kernels/volk/volk_32f_s32f_power_32f.h | 200 + .../volk/volk_32f_s32f_s32f_mod_range_32f.h | 363 + kernels/volk/volk_32f_s32f_stddev_32f.h | 347 + kernels/volk/volk_32f_sin_32f.h | 897 ++ kernels/volk/volk_32f_sqrt_32f.h | 221 + .../volk/volk_32f_stddev_and_mean_32f_x2.h | 572 ++ kernels/volk/volk_32f_tan_32f.h | 754 ++ kernels/volk/volk_32f_tanh_32f.h | 415 + kernels/volk/volk_32f_x2_add_32f.h | 413 + kernels/volk/volk_32f_x2_divide_32f.h | 350 + kernels/volk/volk_32f_x2_dot_prod_16i.h | 682 ++ kernels/volk/volk_32f_x2_dot_prod_32f.h | 976 ++ .../volk/volk_32f_x2_fm_detectpuppet_32f.h | 82 + kernels/volk/volk_32f_x2_interleave_32fc.h | 258 + kernels/volk/volk_32f_x2_max_32f.h | 333 + kernels/volk/volk_32f_x2_min_32f.h | 337 + kernels/volk/volk_32f_x2_multiply_32f.h | 379 + kernels/volk/volk_32f_x2_pow_32f.h | 977 ++ .../volk/volk_32f_x2_s32f_interleave_16ic.h | 330 + kernels/volk/volk_32f_x2_subtract_32f.h | 328 + kernels/volk/volk_32f_x3_sum_of_poly_32f.h | 657 ++ kernels/volk/volk_32fc_32f_add_32fc.h | 234 + kernels/volk/volk_32fc_32f_dot_prod_32fc.h | 774 ++ kernels/volk/volk_32fc_32f_multiply_32fc.h | 228 + kernels/volk/volk_32fc_accumulator_s32fc.h | 279 + kernels/volk/volk_32fc_conjugate_32fc.h | 280 + kernels/volk/volk_32fc_convert_16ic.h | 419 + kernels/volk/volk_32fc_deinterleave_32f_x2.h | 257 + kernels/volk/volk_32fc_deinterleave_64f_x2.h | 336 + .../volk/volk_32fc_deinterleave_imag_32f.h | 232 + .../volk/volk_32fc_deinterleave_real_32f.h | 237 + .../volk/volk_32fc_deinterleave_real_64f.h | 243 + kernels/volk/volk_32fc_index_max_16u.h | 469 + kernels/volk/volk_32fc_index_max_32u.h | 512 + kernels/volk/volk_32fc_index_min_16u.h | 465 + kernels/volk/volk_32fc_index_min_32u.h | 507 + kernels/volk/volk_32fc_magnitude_32f.h | 457 + .../volk/volk_32fc_magnitude_squared_32f.h | 370 + kernels/volk/volk_32fc_s32f_atan2_32f.h | 208 + .../volk_32fc_s32f_deinterleave_real_16i.h | 258 + kernels/volk/volk_32fc_s32f_magnitude_16i.h | 309 + kernels/volk/volk_32fc_s32f_power_32fc.h | 159 + ...fc_s32f_power_spectral_densitypuppet_32f.h | 63 + .../volk/volk_32fc_s32f_power_spectrum_32f.h | 225 + ..._32fc_s32f_x2_power_spectral_density_32f.h | 247 + kernels/volk/volk_32fc_s32fc_multiply_32fc.h | 443 + .../volk/volk_32fc_s32fc_rotatorpuppet_32fc.h | 169 + .../volk/volk_32fc_s32fc_x2_rotator_32fc.h | 780 ++ kernels/volk/volk_32fc_x2_add_32fc.h | 277 + .../volk_32fc_x2_conjugate_dot_prod_32fc.h | 743 ++ kernels/volk/volk_32fc_x2_divide_32fc.h | 380 + kernels/volk/volk_32fc_x2_dot_prod_32fc.h | 1326 +++ kernels/volk/volk_32fc_x2_multiply_32fc.h | 482 + .../volk_32fc_x2_multiply_conjugate_32fc.h | 311 + ...32fc_x2_s32f_square_dist_scalar_mult_32f.h | 538 + ...2fc_x2_s32fc_multiply_conjugate_add_32fc.h | 344 + kernels/volk/volk_32fc_x2_square_dist_32f.h | 393 + kernels/volk/volk_32i_s32f_convert_32f.h | 336 + kernels/volk/volk_32i_x2_and_32i.h | 341 + kernels/volk/volk_32i_x2_or_32i.h | 340 + kernels/volk/volk_32u_byteswap.h | 367 + kernels/volk/volk_32u_byteswappuppet_32u.h | 94 + kernels/volk/volk_32u_popcnt.h | 79 + kernels/volk/volk_32u_popcntpuppet_32u.h | 40 + kernels/volk/volk_32u_reverse_32u.h | 407 + kernels/volk/volk_64f_convert_32f.h | 336 + kernels/volk/volk_64f_x2_add_64f.h | 247 + kernels/volk/volk_64f_x2_max_64f.h | 294 + kernels/volk/volk_64f_x2_min_64f.h | 294 + kernels/volk/volk_64f_x2_multiply_64f.h | 247 + kernels/volk/volk_64u_byteswap.h | 501 + kernels/volk/volk_64u_byteswappuppet_64u.h | 117 + kernels/volk/volk_64u_popcnt.h | 120 + kernels/volk/volk_64u_popcntpuppet_64u.h | 56 + kernels/volk/volk_8i_convert_16i.h | 287 + kernels/volk/volk_8i_s32f_convert_32f.h | 373 + kernels/volk/volk_8ic_deinterleave_16i_x2.h | 395 + kernels/volk/volk_8ic_deinterleave_real_16i.h | 303 + kernels/volk/volk_8ic_deinterleave_real_8i.h | 405 + .../volk/volk_8ic_s32f_deinterleave_32f_x2.h | 444 + .../volk_8ic_s32f_deinterleave_real_32f.h | 353 + .../volk_8ic_x2_multiply_conjugate_16ic.h | 276 + ...volk_8ic_x2_s32f_multiply_conjugate_32fc.h | 344 + kernels/volk/volk_8u_conv_k7_r2puppet_8u.h | 424 + kernels/volk/volk_8u_x2_encodeframepolar_8u.h | 1137 +++ kernels/volk/volk_8u_x3_encodepolar_8u_x2.h | 172 + .../volk/volk_8u_x3_encodepolarpuppet_8u.h | 140 + kernels/volk/volk_8u_x4_conv_k7_r2_8u.h | 950 ++ lib/CMakeLists.txt | 634 ++ lib/constants.c.in | 47 + lib/kernel_tests.h | 188 + lib/qa_utils.cc | 887 ++ lib/qa_utils.h | 209 + lib/testqa.cc | 145 + lib/volk_malloc.c | 87 + lib/volk_prefs.c | 107 + lib/volk_rank_archs.c | 94 + lib/volk_rank_archs.h | 36 + python/volk_modtool/CMakeLists.txt | 28 + python/volk_modtool/README | 113 + python/volk_modtool/__init__.py | 12 + python/volk_modtool/cfg.py | 84 + python/volk_modtool/volk_modtool | 116 + python/volk_modtool/volk_modtool_generate.py | 302 + scripts/ci/download_intel_sde.sh | 38 + scripts/licensing/count_contrib.sh | 56 + scripts/tools/release.sh | 146 + scripts/tools/run_citations_update.py | 148 + scripts/tools/update_citations.sh | 18 + tmpl/volk.pc.in | 13 + tmpl/volk.tmpl.c | 198 + tmpl/volk.tmpl.h | 111 + tmpl/volk_config_fixed.tmpl.h | 17 + tmpl/volk_cpu.tmpl.c | 96 + tmpl/volk_cpu.tmpl.h | 30 + tmpl/volk_machine_xxx.tmpl.c | 49 + tmpl/volk_machines.tmpl.c | 22 + tmpl/volk_machines.tmpl.h | 43 + tmpl/volk_typedefs.tmpl.h | 20 + 373 files changed, 88786 insertions(+) create mode 100644 .clang-format create mode 100644 .github/workflows/android_build.yml create mode 100644 .github/workflows/check-pr-formatting.yml create mode 100644 .github/workflows/run-tests.yml create mode 100644 .gitignore create mode 100644 .gitlab-ci.yml create mode 100644 .gitmodules create mode 100644 .lastrelease create mode 100644 .travis.yml create mode 100644 .zenodo.json create mode 100644 CMakeLists.txt create mode 100644 COPYING create mode 120000 COPYING-GPL create mode 100644 COPYING-LGPL create mode 100644 README.md create mode 100644 apps/CMakeLists.txt create mode 100755 apps/plot_best_vs_generic.py create mode 100644 apps/volk-config-info.cc create mode 100644 apps/volk_option_helpers.cc create mode 100644 apps/volk_option_helpers.h create mode 100644 apps/volk_profile.cc create mode 100644 apps/volk_profile.h create mode 100644 appveyor.yml create mode 100644 cmake/Modules/CMakeParseArgumentsCopy.cmake create mode 100644 cmake/Modules/FindFILESYSTEM.cmake create mode 100644 cmake/Modules/FindORC.cmake create mode 100644 cmake/Modules/VolkAddTest.cmake create mode 100644 cmake/Modules/VolkBuildTypes.cmake create mode 100644 cmake/Modules/VolkConfig.cmake.in create mode 100644 cmake/Modules/VolkConfigVersion.cmake.in create mode 100644 cmake/Modules/VolkPython.cmake create mode 100644 cmake/Modules/VolkVersion.cmake create mode 100644 cmake/Toolchains/aarch64-linux-gnu.cmake create mode 100644 cmake/Toolchains/arm-linux-gnueabihf.cmake create mode 100644 cmake/Toolchains/arm_cortex_a15_hardfp_native.cmake create mode 100644 cmake/Toolchains/arm_cortex_a53_hardfp_native.cmake create mode 100644 cmake/Toolchains/arm_cortex_a72_hardfp_native.cmake create mode 100644 cmake/Toolchains/arm_cortex_a8_hardfp_native.cmake create mode 100644 cmake/Toolchains/arm_cortex_a8_softfp_native.cmake create mode 100644 cmake/Toolchains/arm_cortex_a9_hardfp_native.cmake create mode 100644 cmake/Toolchains/intel-sde.cmake create mode 100644 cmake/Toolchains/oe-sdk_cross.cmake create mode 100644 cmake/cmake_uninstall.cmake.in create mode 100644 cmake/msvc/config.h create mode 100644 cmake/msvc/sys/time.h create mode 100644 cpu_features/.clang-format create mode 100644 cpu_features/.dockerignore create mode 100644 cpu_features/.github/workflows/Dockerfile create mode 100644 cpu_features/.github/workflows/aarch64_linux.yml create mode 100644 cpu_features/.github/workflows/amd64_freebsd.yml create mode 100644 cpu_features/.github/workflows/amd64_linux.yml create mode 100644 cpu_features/.github/workflows/amd64_macos.yml create mode 100644 cpu_features/.github/workflows/amd64_windows.yml create mode 100644 cpu_features/.github/workflows/arm_linux.yml create mode 100644 cpu_features/.github/workflows/clang_format.yml create mode 100644 cpu_features/.github/workflows/mips_linux.yml create mode 100644 cpu_features/.gitignore create mode 100644 cpu_features/CMakeLists.txt create mode 100644 cpu_features/CONTRIBUTING.md create mode 100644 cpu_features/LICENSE create mode 100644 cpu_features/README.md create mode 100644 cpu_features/ci/Makefile create mode 100644 cpu_features/ci/README.md create mode 100644 cpu_features/ci/doc/docker.dot create mode 100644 cpu_features/ci/doc/docker.svg create mode 100755 cpu_features/ci/doc/generate_image.sh create mode 100644 cpu_features/ci/docker/amd64/Dockerfile create mode 100644 cpu_features/ci/docker/toolchain/Dockerfile create mode 100644 cpu_features/ci/sample/CMakeLists.txt create mode 100644 cpu_features/ci/sample/main.cpp create mode 100644 cpu_features/ci/vagrant/freebsd/Vagrantfile create mode 100644 cpu_features/cmake/CpuFeaturesConfig.cmake.in create mode 100644 cpu_features/cmake/CpuFeaturesNdkCompatConfig.cmake.in create mode 100644 cpu_features/cmake/README.md create mode 100644 cpu_features/cmake/googletest.CMakeLists.txt.in create mode 100644 cpu_features/include/cpu_features_cache_info.h create mode 100644 cpu_features/include/cpu_features_macros.h create mode 100644 cpu_features/include/cpuinfo_aarch64.h create mode 100644 cpu_features/include/cpuinfo_arm.h create mode 100644 cpu_features/include/cpuinfo_mips.h create mode 100644 cpu_features/include/cpuinfo_ppc.h create mode 100644 cpu_features/include/cpuinfo_x86.h create mode 100644 cpu_features/include/internal/bit_utils.h create mode 100644 cpu_features/include/internal/cpuid_x86.h create mode 100644 cpu_features/include/internal/filesystem.h create mode 100644 cpu_features/include/internal/hwcaps.h create mode 100644 cpu_features/include/internal/stack_line_reader.h create mode 100644 cpu_features/include/internal/string_view.h create mode 100644 cpu_features/ndk_compat/CMakeLists.txt create mode 100644 cpu_features/ndk_compat/README.md create mode 100644 cpu_features/ndk_compat/cpu-features.c create mode 100644 cpu_features/ndk_compat/cpu-features.h create mode 100644 cpu_features/ndk_compat/ndk-compat-test.c create mode 100755 cpu_features/scripts/make_release.sh create mode 100755 cpu_features/scripts/run_integration.sh create mode 100755 cpu_features/scripts/test_integration.sh create mode 100644 cpu_features/src/copy.inl create mode 100644 cpu_features/src/define_introspection.inl create mode 100644 cpu_features/src/define_introspection_and_hwcaps.inl create mode 100644 cpu_features/src/equals.inl create mode 100644 cpu_features/src/filesystem.c create mode 100644 cpu_features/src/hwcaps.c create mode 100644 cpu_features/src/impl_aarch64_linux_or_android.c create mode 100644 cpu_features/src/impl_arm_linux_or_android.c create mode 100644 cpu_features/src/impl_mips_linux_or_android.c create mode 100644 cpu_features/src/impl_ppc_linux.c create mode 100644 cpu_features/src/impl_x86__base_implementation.inl create mode 100644 cpu_features/src/impl_x86_freebsd.c create mode 100644 cpu_features/src/impl_x86_linux_or_android.c create mode 100644 cpu_features/src/impl_x86_macos.c create mode 100644 cpu_features/src/impl_x86_windows.c create mode 100644 cpu_features/src/stack_line_reader.c create mode 100644 cpu_features/src/string_view.c create mode 100644 cpu_features/src/utils/list_cpu_features.c create mode 100644 cpu_features/test/CMakeLists.txt create mode 100644 cpu_features/test/bit_utils_test.cc create mode 100644 cpu_features/test/cpuinfo_aarch64_test.cc create mode 100644 cpu_features/test/cpuinfo_arm_test.cc create mode 100644 cpu_features/test/cpuinfo_mips_test.cc create mode 100644 cpu_features/test/cpuinfo_ppc_test.cc create mode 100644 cpu_features/test/cpuinfo_x86_test.cc create mode 100644 cpu_features/test/filesystem_for_testing.cc create mode 100644 cpu_features/test/filesystem_for_testing.h create mode 100644 cpu_features/test/hwcaps_for_testing.cc create mode 100644 cpu_features/test/hwcaps_for_testing.h create mode 100644 cpu_features/test/stack_line_reader_test.cc create mode 100644 cpu_features/test/string_view_test.cc create mode 100644 docs/AUTHORS_RESUBMITTING_UNDER_LGPL_LICENSE.md create mode 100644 docs/CHANGELOG.md create mode 100644 docs/CMakeLists.txt create mode 100644 docs/CODE_OF_CONDUCT.md create mode 100644 docs/CONTRIBUTING.md create mode 100644 docs/DCO.txt create mode 100644 docs/Doxyfile.in create mode 100644 docs/DoxygenLayout.xml create mode 100644 docs/extending_volk.dox create mode 100644 docs/kernels.dox create mode 100644 docs/main_page.dox create mode 100644 docs/terms_and_techniques.dox create mode 100644 docs/using_volk.dox create mode 100644 docs/versioning.md create mode 100644 docs/volk_logo.png create mode 100644 docs/volk_logo_small.png create mode 100644 gen/archs.xml create mode 100644 gen/machines.xml create mode 100644 gen/volk_arch_defs.py create mode 100644 gen/volk_compile_utils.py create mode 100644 gen/volk_kernel_defs.py create mode 100644 gen/volk_machine_defs.py create mode 100644 gen/volk_tmpl_utils.py create mode 100644 include/volk/constants.h create mode 100644 include/volk/saturation_arithmetic.h create mode 100644 include/volk/sse2neon.h create mode 100644 include/volk/volk_alloc.hh create mode 100644 include/volk/volk_avx2_intrinsics.h create mode 100644 include/volk/volk_avx_intrinsics.h create mode 100644 include/volk/volk_common.h create mode 100644 include/volk/volk_complex.h create mode 100644 include/volk/volk_malloc.h create mode 100644 include/volk/volk_neon_intrinsics.h create mode 100644 include/volk/volk_prefs.h create mode 100644 include/volk/volk_sse3_intrinsics.h create mode 100644 include/volk/volk_sse_intrinsics.h create mode 100644 include/volk/volk_version.h.in create mode 100644 kernels/README.md create mode 100644 kernels/volk/asm/neon/volk_16i_max_star_horizontal_16i.s create mode 100644 kernels/volk/asm/neon/volk_32f_s32f_multiply_32f_a_neonasm.s create mode 100644 kernels/volk/asm/neon/volk_32f_x2_add_32f_a_neonasm.s create mode 100644 kernels/volk/asm/neon/volk_32f_x2_add_32f_a_neonpipeline.s create mode 100644 kernels/volk/asm/neon/volk_32f_x2_dot_prod_32f_a_neonasm.s create mode 100644 kernels/volk/asm/neon/volk_32f_x2_dot_prod_32f_a_neonasm_opts.s create mode 100644 kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_a_neonasm.s create mode 100644 kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_a_neonasmvmla.s create mode 100644 kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_a_neonpipeline.s create mode 100644 kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_a_unrollasm.s create mode 100644 kernels/volk/asm/neon/volk_32fc_x2_dot_prod_32fc_a_neonasm.s create mode 100644 kernels/volk/asm/neon/volk_32fc_x2_dot_prod_32fc_a_neonasm_opttests.s create mode 100644 kernels/volk/asm/neon/volk_32fc_x2_multiply_32fc_a_neonasm.s create mode 100644 kernels/volk/asm/orc/volk_16ic_deinterleave_16i_x2_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_16ic_deinterleave_real_8i_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_16ic_magnitude_16i_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_16u_byteswap_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32f_s32f_add_32f_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32f_s32f_multiply_32f_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32f_s32f_normalize_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32f_sqrt_32f_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32f_x2_add_32f_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32f_x2_divide_32f_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32f_x2_dot_prod_32f_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32f_x2_max_32f_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32f_x2_min_32f_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32f_x2_multiply_32f_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32f_x2_subtract_32f_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32fc_32f_multiply_32fc_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32fc_magnitude_32f_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32fc_s32fc_multiply_32fc_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32fc_x2_multiply_32fc_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32i_x2_and_32i_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_32i_x2_or_32i_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_8i_convert_16i_a_orc_impl.orc create mode 100644 kernels/volk/asm/orc/volk_8i_s32f_convert_32f_a_orc_impl.orc create mode 100644 kernels/volk/volk_16i_32fc_dot_prod_32fc.h create mode 100644 kernels/volk/volk_16i_branch_4_state_8.h create mode 100644 kernels/volk/volk_16i_convert_8i.h create mode 100644 kernels/volk/volk_16i_max_star_16i.h create mode 100644 kernels/volk/volk_16i_max_star_horizontal_16i.h create mode 100644 kernels/volk/volk_16i_permute_and_scalar_add.h create mode 100644 kernels/volk/volk_16i_s32f_convert_32f.h create mode 100644 kernels/volk/volk_16i_x4_quad_max_star_16i.h create mode 100644 kernels/volk/volk_16i_x5_add_quad_16i_x4.h create mode 100644 kernels/volk/volk_16ic_convert_32fc.h create mode 100644 kernels/volk/volk_16ic_deinterleave_16i_x2.h create mode 100644 kernels/volk/volk_16ic_deinterleave_real_16i.h create mode 100644 kernels/volk/volk_16ic_deinterleave_real_8i.h create mode 100644 kernels/volk/volk_16ic_magnitude_16i.h create mode 100644 kernels/volk/volk_16ic_s32f_deinterleave_32f_x2.h create mode 100644 kernels/volk/volk_16ic_s32f_deinterleave_real_32f.h create mode 100644 kernels/volk/volk_16ic_s32f_magnitude_32f.h create mode 100644 kernels/volk/volk_16ic_x2_dot_prod_16ic.h create mode 100644 kernels/volk/volk_16ic_x2_multiply_16ic.h create mode 100644 kernels/volk/volk_16u_byteswap.h create mode 100644 kernels/volk/volk_16u_byteswappuppet_16u.h create mode 100644 kernels/volk/volk_32f_64f_add_64f.h create mode 100644 kernels/volk/volk_32f_64f_multiply_64f.h create mode 100644 kernels/volk/volk_32f_8u_polarbutterfly_32f.h create mode 100644 kernels/volk/volk_32f_8u_polarbutterflypuppet_32f.h create mode 100644 kernels/volk/volk_32f_accumulator_s32f.h create mode 100644 kernels/volk/volk_32f_acos_32f.h create mode 100644 kernels/volk/volk_32f_asin_32f.h create mode 100644 kernels/volk/volk_32f_atan_32f.h create mode 100644 kernels/volk/volk_32f_binary_slicer_32i.h create mode 100644 kernels/volk/volk_32f_binary_slicer_8i.h create mode 100644 kernels/volk/volk_32f_convert_64f.h create mode 100644 kernels/volk/volk_32f_cos_32f.h create mode 100644 kernels/volk/volk_32f_exp_32f.h create mode 100644 kernels/volk/volk_32f_expfast_32f.h create mode 100644 kernels/volk/volk_32f_index_max_16u.h create mode 100644 kernels/volk/volk_32f_index_max_32u.h create mode 100644 kernels/volk/volk_32f_index_min_16u.h create mode 100644 kernels/volk/volk_32f_index_min_32u.h create mode 100644 kernels/volk/volk_32f_invsqrt_32f.h create mode 100644 kernels/volk/volk_32f_log2_32f.h create mode 100644 kernels/volk/volk_32f_null_32f.h create mode 100644 kernels/volk/volk_32f_s32f_32f_fm_detect_32f.h create mode 100644 kernels/volk/volk_32f_s32f_add_32f.h create mode 100644 kernels/volk/volk_32f_s32f_calc_spectral_noise_floor_32f.h create mode 100644 kernels/volk/volk_32f_s32f_convert_16i.h create mode 100644 kernels/volk/volk_32f_s32f_convert_32i.h create mode 100644 kernels/volk/volk_32f_s32f_convert_8i.h create mode 100644 kernels/volk/volk_32f_s32f_mod_rangepuppet_32f.h create mode 100644 kernels/volk/volk_32f_s32f_multiply_32f.h create mode 100644 kernels/volk/volk_32f_s32f_normalize.h create mode 100644 kernels/volk/volk_32f_s32f_power_32f.h create mode 100644 kernels/volk/volk_32f_s32f_s32f_mod_range_32f.h create mode 100644 kernels/volk/volk_32f_s32f_stddev_32f.h create mode 100644 kernels/volk/volk_32f_sin_32f.h create mode 100644 kernels/volk/volk_32f_sqrt_32f.h create mode 100644 kernels/volk/volk_32f_stddev_and_mean_32f_x2.h create mode 100644 kernels/volk/volk_32f_tan_32f.h create mode 100644 kernels/volk/volk_32f_tanh_32f.h create mode 100644 kernels/volk/volk_32f_x2_add_32f.h create mode 100644 kernels/volk/volk_32f_x2_divide_32f.h create mode 100644 kernels/volk/volk_32f_x2_dot_prod_16i.h create mode 100644 kernels/volk/volk_32f_x2_dot_prod_32f.h create mode 100644 kernels/volk/volk_32f_x2_fm_detectpuppet_32f.h create mode 100644 kernels/volk/volk_32f_x2_interleave_32fc.h create mode 100644 kernels/volk/volk_32f_x2_max_32f.h create mode 100644 kernels/volk/volk_32f_x2_min_32f.h create mode 100644 kernels/volk/volk_32f_x2_multiply_32f.h create mode 100644 kernels/volk/volk_32f_x2_pow_32f.h create mode 100644 kernels/volk/volk_32f_x2_s32f_interleave_16ic.h create mode 100644 kernels/volk/volk_32f_x2_subtract_32f.h create mode 100644 kernels/volk/volk_32f_x3_sum_of_poly_32f.h create mode 100644 kernels/volk/volk_32fc_32f_add_32fc.h create mode 100644 kernels/volk/volk_32fc_32f_dot_prod_32fc.h create mode 100644 kernels/volk/volk_32fc_32f_multiply_32fc.h create mode 100644 kernels/volk/volk_32fc_accumulator_s32fc.h create mode 100644 kernels/volk/volk_32fc_conjugate_32fc.h create mode 100644 kernels/volk/volk_32fc_convert_16ic.h create mode 100644 kernels/volk/volk_32fc_deinterleave_32f_x2.h create mode 100644 kernels/volk/volk_32fc_deinterleave_64f_x2.h create mode 100644 kernels/volk/volk_32fc_deinterleave_imag_32f.h create mode 100644 kernels/volk/volk_32fc_deinterleave_real_32f.h create mode 100644 kernels/volk/volk_32fc_deinterleave_real_64f.h create mode 100644 kernels/volk/volk_32fc_index_max_16u.h create mode 100644 kernels/volk/volk_32fc_index_max_32u.h create mode 100644 kernels/volk/volk_32fc_index_min_16u.h create mode 100644 kernels/volk/volk_32fc_index_min_32u.h create mode 100644 kernels/volk/volk_32fc_magnitude_32f.h create mode 100644 kernels/volk/volk_32fc_magnitude_squared_32f.h create mode 100644 kernels/volk/volk_32fc_s32f_atan2_32f.h create mode 100644 kernels/volk/volk_32fc_s32f_deinterleave_real_16i.h create mode 100644 kernels/volk/volk_32fc_s32f_magnitude_16i.h create mode 100644 kernels/volk/volk_32fc_s32f_power_32fc.h create mode 100644 kernels/volk/volk_32fc_s32f_power_spectral_densitypuppet_32f.h create mode 100644 kernels/volk/volk_32fc_s32f_power_spectrum_32f.h create mode 100644 kernels/volk/volk_32fc_s32f_x2_power_spectral_density_32f.h create mode 100644 kernels/volk/volk_32fc_s32fc_multiply_32fc.h create mode 100644 kernels/volk/volk_32fc_s32fc_rotatorpuppet_32fc.h create mode 100644 kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h create mode 100644 kernels/volk/volk_32fc_x2_add_32fc.h create mode 100644 kernels/volk/volk_32fc_x2_conjugate_dot_prod_32fc.h create mode 100644 kernels/volk/volk_32fc_x2_divide_32fc.h create mode 100644 kernels/volk/volk_32fc_x2_dot_prod_32fc.h create mode 100644 kernels/volk/volk_32fc_x2_multiply_32fc.h create mode 100644 kernels/volk/volk_32fc_x2_multiply_conjugate_32fc.h create mode 100644 kernels/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f.h create mode 100644 kernels/volk/volk_32fc_x2_s32fc_multiply_conjugate_add_32fc.h create mode 100644 kernels/volk/volk_32fc_x2_square_dist_32f.h create mode 100644 kernels/volk/volk_32i_s32f_convert_32f.h create mode 100644 kernels/volk/volk_32i_x2_and_32i.h create mode 100644 kernels/volk/volk_32i_x2_or_32i.h create mode 100644 kernels/volk/volk_32u_byteswap.h create mode 100644 kernels/volk/volk_32u_byteswappuppet_32u.h create mode 100644 kernels/volk/volk_32u_popcnt.h create mode 100644 kernels/volk/volk_32u_popcntpuppet_32u.h create mode 100644 kernels/volk/volk_32u_reverse_32u.h create mode 100644 kernels/volk/volk_64f_convert_32f.h create mode 100644 kernels/volk/volk_64f_x2_add_64f.h create mode 100644 kernels/volk/volk_64f_x2_max_64f.h create mode 100644 kernels/volk/volk_64f_x2_min_64f.h create mode 100644 kernels/volk/volk_64f_x2_multiply_64f.h create mode 100644 kernels/volk/volk_64u_byteswap.h create mode 100644 kernels/volk/volk_64u_byteswappuppet_64u.h create mode 100644 kernels/volk/volk_64u_popcnt.h create mode 100644 kernels/volk/volk_64u_popcntpuppet_64u.h create mode 100644 kernels/volk/volk_8i_convert_16i.h create mode 100644 kernels/volk/volk_8i_s32f_convert_32f.h create mode 100644 kernels/volk/volk_8ic_deinterleave_16i_x2.h create mode 100644 kernels/volk/volk_8ic_deinterleave_real_16i.h create mode 100644 kernels/volk/volk_8ic_deinterleave_real_8i.h create mode 100644 kernels/volk/volk_8ic_s32f_deinterleave_32f_x2.h create mode 100644 kernels/volk/volk_8ic_s32f_deinterleave_real_32f.h create mode 100644 kernels/volk/volk_8ic_x2_multiply_conjugate_16ic.h create mode 100644 kernels/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc.h create mode 100644 kernels/volk/volk_8u_conv_k7_r2puppet_8u.h create mode 100644 kernels/volk/volk_8u_x2_encodeframepolar_8u.h create mode 100644 kernels/volk/volk_8u_x3_encodepolar_8u_x2.h create mode 100644 kernels/volk/volk_8u_x3_encodepolarpuppet_8u.h create mode 100644 kernels/volk/volk_8u_x4_conv_k7_r2_8u.h create mode 100644 lib/CMakeLists.txt create mode 100644 lib/constants.c.in create mode 100644 lib/kernel_tests.h create mode 100644 lib/qa_utils.cc create mode 100644 lib/qa_utils.h create mode 100644 lib/testqa.cc create mode 100644 lib/volk_malloc.c create mode 100644 lib/volk_prefs.c create mode 100644 lib/volk_rank_archs.c create mode 100644 lib/volk_rank_archs.h create mode 100644 python/volk_modtool/CMakeLists.txt create mode 100644 python/volk_modtool/README create mode 100644 python/volk_modtool/__init__.py create mode 100644 python/volk_modtool/cfg.py create mode 100755 python/volk_modtool/volk_modtool create mode 100644 python/volk_modtool/volk_modtool_generate.py create mode 100755 scripts/ci/download_intel_sde.sh create mode 100755 scripts/licensing/count_contrib.sh create mode 100755 scripts/tools/release.sh create mode 100644 scripts/tools/run_citations_update.py create mode 100755 scripts/tools/update_citations.sh create mode 100644 tmpl/volk.pc.in create mode 100644 tmpl/volk.tmpl.c create mode 100644 tmpl/volk.tmpl.h create mode 100644 tmpl/volk_config_fixed.tmpl.h create mode 100644 tmpl/volk_cpu.tmpl.c create mode 100644 tmpl/volk_cpu.tmpl.h create mode 100644 tmpl/volk_machine_xxx.tmpl.c create mode 100644 tmpl/volk_machines.tmpl.c create mode 100644 tmpl/volk_machines.tmpl.h create mode 100644 tmpl/volk_typedefs.tmpl.h diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..285b68d --- /dev/null +++ b/.clang-format @@ -0,0 +1,106 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: true +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: true +BinPackArguments: false +BinPackParameters: false +BreakBeforeBraces: Custom +BraceWrapping: + AfterClass: true + AfterControlStatement: false + AfterEnum: false + AfterFunction: true + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 90 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeCategories: + - Regex: '^"(gnuradio)/' + Priority: 1 + - Regex: '^<(gnuradio)/' + Priority: 2 + - Regex: '^<(boost)/' + Priority: 98 + - Regex: '^<[a-z]*>$' + Priority: 99 + - Regex: '^".*"$' + Priority: 0 + - Regex: '.*' + Priority: 10 + +IncludeIsMainRegex: '(Test)?$' +IndentCaseLabels: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Left +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 8 +UseTab: Never + + diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml new file mode 100644 index 0000000..af5daaa --- /dev/null +++ b/.github/workflows/android_build.yml @@ -0,0 +1,49 @@ +name: Build on Android NDK + +on: [push, pull_request] + +jobs: + build: + name: Build on Android NDK ${{ matrix.arch.name }} + + strategy: + fail-fast: false + matrix: + arch: + - { name: armeabi-v7a, allow_fail: false } + - { name: arm64-v8a, allow_fail: false } + - { name: x86, allow_fail: false } + - { name: x86_64, allow_fail: false } + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3.0.0 + with: + submodules: 'recursive' + + - name: Update repositories + run: sudo apt update + + # All dependencies + - name: Install dependencies + run: sudo apt install -y cmake openjdk-11-jre-headless wget unzip make python3-mako + + # Setup Android SDK, and auto-accept licenses + - name: Install Android SDK + run: wget --quiet --output-document=android-sdk.zip https://dl.google.com/android/repository/commandlinetools-linux-8512546_latest.zip && mkdir android-sdk-linux && unzip -qq android-sdk.zip -d android-sdk-linux && export ANDROID_HOME=./android-sdk-linux && echo y | $ANDROID_HOME/cmdline-tools/bin/sdkmanager --sdk_root=android-sdk-linux --update && (echo y; echo y; echo y; echo y; echo y; echo y; echo y; echo y) | $ANDROID_HOME/cmdline-tools/bin/sdkmanager --sdk_root=android-sdk-linux --licenses + + # Call SDKManager to install the Android NDK + - name: Install Android NDK + run: $GITHUB_WORKSPACE/android-sdk-linux/cmdline-tools/bin/sdkmanager --sdk_root=$GITHUB_WORKSPACE/android-sdk-linux --install "ndk;24.0.8215888" --channel=3 + + # Setup build directory + - name: Setup ${{ matrix.arch.name }} + shell: bash + run: cd $GITHUB_WORKSPACE/ && mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/android-sdk-linux/ndk/24.0.8215888/build/cmake/android.toolchain.cmake -DANDROID_ABI=${{ matrix.arch.name }} -DANDROID_PLATFORM=android-23 .. + + # Build + - name: Build ${{ matrix.arch.name }} + shell: bash + run: cd $GITHUB_WORKSPACE/build && make + continue-on-error: ${{ matrix.arch.allow_fail }} diff --git a/.github/workflows/check-pr-formatting.yml b/.github/workflows/check-pr-formatting.yml new file mode 100644 index 0000000..35c6327 --- /dev/null +++ b/.github/workflows/check-pr-formatting.yml @@ -0,0 +1,23 @@ +name: Check PR Formatting + +on: + push: + paths-ignore: + - 'tmpl/' + - 'include/volk/sse2neon.h' + pull_request: + paths-ignore: + - 'tmpl/' + - 'include/volk/sse2neon.h' + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - uses: gnuradio/clang-format-lint-action@v0.5-4 + with: + source: '.' + exclude: './tmpl,./include/volk/sse2neon.h' + extensions: 'c,cc,cpp,cxx,h,hh' \ No newline at end of file diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml new file mode 100644 index 0000000..813bd45 --- /dev/null +++ b/.github/workflows/run-tests.yml @@ -0,0 +1,245 @@ +name: Run VOLK tests + +on: [push, pull_request] + +jobs: + build-ubuntu: + name: Build on ubuntu-latest ${{ matrix.compiler.name }} + + strategy: + fail-fast: false + matrix: + compiler: + - { name: g++-7, cc: gcc-7, cxx: g++-7 } + - { name: g++-8, cc: gcc-8, cxx: g++-8 } + - { name: g++-9, cc: gcc-9, cxx: g++-9 } + - { name: g++-10, cc: gcc-10, cxx: g++-10 } + - { name: clang-7, cc: clang-7, cxx: clang++-7 } + - { name: clang-8, cc: clang-8, cxx: clang++-8 } + - { name: clang-9, cc: clang-9, cxx: clang++-9 } + - { name: clang-10, cc: clang-10, cxx: clang++-10 } + # - { name: clang-11, cc: clang-11, cxx: clang++-11 } + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3.0.0 + with: + submodules: 'recursive' + - name: Install dependencies + run: sudo apt install python3-mako liborc-dev ${{ matrix.compiler.name }} + - name: Configure + env: + CC: ${{ matrix.compiler.cc }} + CXX: ${{ matrix.compiler.cxx }} + run: mkdir build && cd build && cmake -DCMAKE_CXX_FLAGS="-Werror" .. + - name: Build + run: cmake --build build + - name: Print info + run: | + ./build/cpu_features/list_cpu_features + ./build/apps/volk-config-info --alignment + ./build/apps/volk-config-info --avail-machines + ./build/apps/volk-config-info --all-machines + ./build/apps/volk-config-info --malloc + ./build/apps/volk-config-info --cc + - name: Test + run: | + cd build + ctest -V + + build-ubuntu-arm: + # The host should always be linux + # see: https://github.com/uraimo/run-on-arch-action + runs-on: ubuntu-latest + name: Build on ${{ matrix.distro }} ${{ matrix.arch }} ${{ matrix.compiler.name }} + + # Run steps on a matrix of compilers and possibly archs. + strategy: + fail-fast: false + matrix: + include: + - arch: aarch64 + distro: ubuntu20.04 + compiler: { name: g++-8, cc: gcc-8, cxx: g++-8 } + - arch: aarch64 + distro: ubuntu20.04 + compiler: { name: g++-9, cc: gcc-9, cxx: g++-9 } + - arch: aarch64 + distro: ubuntu20.04 + compiler: { name: g++-10, cc: gcc-10, cxx: g++-10 } + # This test failed with a stl filesystem issue. + # - arch: aarch64 + # distro: ubuntu20.04 + # compiler: { name: clang-8, cc: clang-8, cxx: clang++-8 } + - arch: aarch64 + distro: ubuntu20.04 + compiler: { name: clang-9, cc: clang-9, cxx: clang++-9 } + - arch: aarch64 + distro: ubuntu20.04 + compiler: { name: clang-10, cc: clang-10, cxx: clang++-10 } + # CMake fails to detect compiler on armv7 ... + # - arch: armv7 + # distro: ubuntu20.04 + # compiler: { name: g++-10, cc: gcc-10, cxx: g++-10 } + # - arch: armv7 + # distro: ubuntu20.04 + # compiler: { name: clang-10, cc: clang-10, cxx: clang++-10 } + # - arch: ppc64le + # distro: ubuntu20.04 + # - arch: s390x + # distro: ubuntu20.04 + + steps: + - uses: actions/checkout@v3.0.0 + with: + submodules: 'recursive' + - uses: uraimo/run-on-arch-action@v2.0.5 + name: Build in non-x86 container + id: build + with: + arch: ${{ matrix.arch }} + distro: ${{ matrix.distro }} + + # Not required, but speeds up builds + githubToken: ${{ github.token }} + + setup: | + mkdir -p "${PWD}/build" + + dockerRunArgs: | + --volume "${PWD}:/volk" + + env: | # YAML, but pipe character is necessary + CC: ${{ matrix.compiler.cc }} + CXX: ${{ matrix.compiler.cxx }} + + shell: /bin/sh + + install: | + case "${{ matrix.distro }}" in + ubuntu*|jessie|stretch|buster) + apt-get update -q -y + apt-get install -q -y git cmake python3-mako liborc-dev ${{ matrix.compiler.name }} + ;; + fedora*) + dnf -y update + dnf -y install git which + ;; + esac + + run: | + cd /volk + cd build + cmake -DCMAKE_CXX_FLAGS="-Werror" .. + make -j$(nproc) + ./cpu_features/list_cpu_features + ./apps/volk-config-info --alignment + ./apps/volk-config-info --avail-machines + ./apps/volk-config-info --all-machines + ./apps/volk-config-info --malloc + ./apps/volk-config-info --cc + ctest -V + + + build-ubuntu-static: + name: Build static on ubuntu-latest + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3.0.0 + with: + submodules: 'recursive' + - name: dependencies + run: sudo apt install python3-mako liborc-dev + - name: configure + run: mkdir build && cd build && cmake -DENABLE_STATIC_LIBS=True .. + - name: build + run: cmake --build build + - name: Print info + run: | + ./build/cpu_features/list_cpu_features + ./build/apps/volk-config-info --alignment + ./build/apps/volk-config-info --avail-machines + ./build/apps/volk-config-info --all-machines + ./build/apps/volk-config-info --malloc + ./build/apps/volk-config-info --cc + - name: test + run: cd build && ctest -V + + build-windows: + + runs-on: windows-latest + + steps: + - uses: actions/checkout@v3.0.0 + with: + submodules: 'recursive' + - name: dependencies + run: pip install mako + - name: configure + run: mkdir build && cd build && cmake .. + - name: build + run: cmake --build build --config Release --target INSTALL + - name: test + run: cd build && ctest -V -C Release + + # build-windows-msys2: + # name: Build on windows-latest using MinGW and MSYS2 + + # runs-on: windows-latest + # steps: + # - uses: msys2/setup-msys2@v2 + # with: + # update: true + # install: >- + # base-devel + # git + # mingw-w64-x86_64-gcc-libs + # mingw-w64-x86_64-orc + # python + # python-mako + # python-six + # mingw-w64-x86_64-gcc + # mingw-w64-x86_64-cmake + # - uses: actions/checkout@v2 + # - name: Checkout submodules + # run: git submodule update --init --recursive + # - name: Configure + # shell: msys2 {0} + # run: mkdir build && cd build && cmake -G "MSYS Makefiles" -DCMAKE_BUILD_TYPE=Release -DPYTHON_EXECUTABLE=/usr/bin/python3 -DCMAKE_CXX_FLAGS="-Werror" .. + # - name: Build + # shell: msys2 {0} + # run: cd build && make -j$(nproc) + # - name: Test + # shell: msys2 {0} + # run: | + # cd build + # ctest -V + + build-macos: + + runs-on: macos-latest + + steps: + - uses: actions/checkout@v3.0.0 + with: + submodules: 'recursive' + - uses: actions/checkout@v1 + - name: dependencies + run: pip3 install mako + - name: configure + run: mkdir build && cd build && cmake .. + - name: build + run: cmake --build build --config Debug + - name: Print info + run: | + ./build/cpu_features/list_cpu_features + # ./build/apps/volk-config-info --alignment + # ./build/apps/volk-config-info --avail-machines + # ./build/apps/volk-config-info --all-machines + # ./build/apps/volk-config-info --malloc + # ./build/apps/volk-config-info --cc + - name: test + run: cd build && ctest -V diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..92611b5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*~ +*.pyc +*.pyo +*build*/ +archives/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..fbeea52 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,30 @@ +# This file is a template, and might need editing before it works on your project. +# use the official gcc image, based on debian +# can use versions as well, like gcc:5.2 +# see https://hub.docker.com/_/gcc/ +image: ubuntu:18.04 + +build: + stage: build + # instead of calling g++ directly you can also use some build toolkit like make + # install the necessary build tools when needed + before_script: + - apt update && apt -y install make cmake python python-pip && pip install mako + script: + - mkdir build && cd build && cmake .. && make -j + artifacts: + paths: + - build/ + # depending on your build setup it's most likely a good idea to cache outputs to reduce the build time + # cache: + # paths: + # - "*.o" + +# run tests using the binary built before +test: + stage: test + before_script: + - apt update && apt -y install cmake python python-pip && pip install mako + script: + - cd build && ctest -V + diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..c94aa2a --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "cpu_features"] + path = cpu_features + url = https://github.com/google/cpu_features.git diff --git a/.lastrelease b/.lastrelease new file mode 100644 index 0000000..f6dcb64 --- /dev/null +++ b/.lastrelease @@ -0,0 +1 @@ +v2.5.2 diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..a08d235 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,67 @@ +language: cpp + +os: linux +dist: bionic + +addons: + apt: + packages: &common_packages + - python3-mako + - liborc-dev + +env: + global: + - SDE_VERSION=sde-external-8.50.0-2020-03-26-lin + - SDE_URL=http://software.intel.com/content/dam/develop/external/us/en/protected/ + +matrix: + include: + # Job 1 ... gcc-7 with Intel SDE + - name: Linux x86 Intel SDE GCC 7 + env: MATRIX_EVAL="CC=gcc-7 && CXX=g++-7 CMAKE_ARG=-DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchains/intel-sde.cmake" + addons: {apt: {sources: "ubuntu-toolchain-r-test", packages: [*common_packages]}} + cache: + directories: + - ${TRAVIS_BUILD_DIR}/cache + before_script: + - cd ${TRAVIS_BUILD_DIR} && ./scripts/ci/download_intel_sde.sh + + # Job 4 ... gcc-6 + - name: Linux x86 GCC 6 + env: MATRIX_EVAL="CC=gcc-6 && CXX=g++-6" + addons: {apt: {sources: "ubuntu-toolchain-r-test", packages: [*common_packages, g++-6]}} + + # Job 7 ... ARMv7 cross compile + - name: Linux ARMv7 Qemu GCC 7 + env: MATRIX_EVAL="CMAKE_ARG=-DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchains/arm-linux-gnueabihf.cmake" + addons: {apt: {sources: "ubuntu-toolchain-r-test", packages: [*common_packages, g++-arm-linux-gnueabihf, qemu-user]}} + + # Job 8 ... ARMv8 (aarch64) cross compile + - name: Linux ARMv8 (aarch64) Qemu GCC 7 + env: MATRIX_EVAL="CMAKE_ARG=-DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchains/aarch64-linux-gnu.cmake" + addons: {apt: {sources: "ubuntu-toolchain-r-test", packages: [*common_packages, g++-aarch64-linux-gnu, qemu-user]}} + + # Job 9 ... clang + - name: Linux x86 Clang 6 + env: MATRIX_EVAL="CC=\"clang -fprofile-instr-generate -fcoverage-mapping\" && CXX=\"clang++ -fprofile-instr-generate -fcoverage-mapping\"" + addons: {apt: {packages: [*common_packages, ]}} + + - name: Linux ARMv8 (aarch64) GCC 7 + arch: arm64 + env: MATRIX_EVAL="CC=gcc-7 && CXX=g++-7" + addons: {apt: {packages: [*common_packages, ]}} + + - name: Linux ARMv8 (aarch64) Clang 6 + arch: arm64 + env: MATRIX_EVAL="CC=clang && CXX=clang++" + addons: {apt: {packages: [*common_packages, ]}} + +script: + - eval "${MATRIX_EVAL}" + - lscpu + - git submodule update --init --recursive + - mkdir build && cd build + - cmake ${CMAKE_ARG} ../ + - make + - echo $(./apps/volk-config-info --malloc) && echo $(./apps/volk-config-info --alignment) && echo "All compiled VOLK machines:" $(./apps/volk-config-info --all-machines) && echo "Available VOLK machines:" $(./apps/volk-config-info --avail-machines) + - ctest -V diff --git a/.zenodo.json b/.zenodo.json new file mode 100644 index 0000000..f787581 --- /dev/null +++ b/.zenodo.json @@ -0,0 +1,261 @@ +{ + "title": "Vector-Optimized Library of Kernels (VOLK)", + "description": "VOLK is the Vector-Optimized Library of Kernels. It is a free library that contains kernels of hand-written SIMD code for different mathematical operations. Since each SIMD architecture can be very different and no compiler has yet come along to handle vectorization properly or highly efficiently, VOLK approaches the problem differently. For each architecture or platform that a developer wishes to vectorize for, a new proto-kernel is added to VOLK. At runtime, VOLK will select the correct proto-kernel. In this way, the users of VOLK call a kernel for performing the operation that is platform/architecture agnostic. This allows us to write portable SIMD code that is optimized for a variety of platforms.", + "license": { + "id": "GPL-3.0-or-later" + }, + "keywords": [ + "communication", + "software radio", + "SDR", + "C", + "C++", + "SIMD", + "library" + ], + "creators": [ + { + "affiliation": "Department of Communications Engineering, University of Bremen, Germany", + "name": "Demel, Johannes", + "orcid": "0000-0002-5434-7232" + }, + { + "name": "Dickens, Michael" + }, + { + "name": "Anderson, Douglas" + }, + { + "name": "Ashton, Brennan" + }, + { + "name": "Balister, Philip" + }, + { + "name": "Behar, Doron" + }, + { + "name": "Behnke, Steven" + }, + { + "name": "Bekhit, Amr" + }, + { + "affiliation": "Carnegie Mellon University, IIT Bombay", + "name": "Bhowmick, Abhishek" + }, + { + "name": "Blossom, Eric" + }, + { + "name": "Blum, Josh" + }, + { + "name": "Bottoms, A. Maitland" + }, + { + "name": "Briggs, Elliot" + }, + { + "name": "Cardoso, Jeison" + }, + { + "name": "Cercueil, Paul" + }, + { + "affiliation": "Corgan Labs", + "name": "Corgan, Johnathan" + }, + { + "affiliation": "Skylark Wireless (@skylarkwireless)", + "name": "Corgan, Nicholas" + }, + { + "name": "Cruz, Luigi" + }, + { + "name": "Economos, Ron" + }, + { + "name": "Enochs, Brandon P." + }, + { + "affiliation": "Centre Tecnol\u00f2gic de Telecomunicacions de Catalunya (CTTC)", + "name": "Fernandez, Carles" + }, + { + "name": "Fischer, Moritz" + }, + { + "name": "Foster, Nick" + }, + { + "name": "Geiger, Douglas" + }, + { + "name": "Giard, Pascal" + }, + { + "name": "Goavec-Merou, Gwenhael" + }, + { + "name": "Hilburn, Ben" + }, + { + "name": "Holguin, Albert" + }, + { + "name": "Iwamoto, Jessica" + }, + { + "name": "Kaesberger, Martin" + }, + { + "name": "Lichtman, Marc" + }, + { + "name": "Logue, Kyle A" + }, + { + "name": "Lundmark, Magnus" + }, + { + "name": "Markgraf, Steve" + }, + { + "name": "Mayer, Christoph" + }, + { + "name": "McCarthy, Nicholas" + }, + { + "name": "McCarthy, Nick" + }, + { + "affiliation": "University of Colorado Boulder", + "name": "Miralles, Damian" + }, + { + "name": "Munaut, Sylvain" + }, + { + "name": "M\u00fcller, Marcus" + }, + { + "affiliation": "GCN Development", + "name": "Nieboer, Geof" + }, + { + "affiliation": "@deepsig", + "name": "O'Shea, Tim" + }, + { + "name": "Olivain, Julien" + }, + { + "name": "Oltmanns, Stefan" + }, + { + "name": "Pinkava, Jiri" + }, + { + "name": "Piscopo, Mike" + }, + { + "name": "Quiceno, Jam M. Hernandez" + }, + { + "name": "Rene, Mathieu" + }, + { + "name": "Ritterhoff, Florian" + }, + { + "name": "Robertson, Dan" + }, + { + "name": "Rocca, Federico 'Larroca' La" + }, + { + "name": "Rode, Andrej" + }, + { + "name": "Rodionov, Andrey" + }, + { + "affiliation": "GNU Radio", + "name": "Rondeau, Tom" + }, + { + "name": "Sekine, Takehiro" + }, + { + "name": "Semich, Karl" + }, + { + "name": "Sergeev, Vanya" + }, + { + "name": "Slokva, Alexey" + }, + { + "name": "Smith, Clayton" + }, + { + "affiliation": "Medurit AB", + "name": "Stigo, Albin" + }, + { + "name": "Thompson, Adam" + }, + { + "name": "Thompson, Roy" + }, + { + "name": "Velichkov, Vasil" + }, + { + "affiliation": "@MITHaystack", + "name": "Volz, Ryan" + }, + { + "name": "Walls, Andy" + }, + { + "name": "Ward, Don" + }, + { + "name": "West, Nathan" + }, + { + "name": "Wiedemann, Bernhard M." + }, + { + "name": "Wunsch, Stefan" + }, + { + "name": "Zapodovnikov, Valerii" + }, + { + "name": "\u0160karvada, Jaroslav" + }, + { + "name": "Aang23" + }, + { + "name": "AlexandreRouma" + }, + { + "name": "Andrew" + }, + { + "name": "Zlika" + }, + { + "name": "luz.paz" + }, + { + "name": "rear1019" + } + ] +} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..53d922b --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,404 @@ +# +# Copyright 2011-2020 Free Software Foundation, Inc. +# +# This file is part of VOLK +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +######################################################################## +# Project setup +######################################################################## +cmake_minimum_required(VERSION 3.8) +set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "Choose build type: None Debug Release RelWithDebInfo MinSizeRel") +project(volk) + +enable_language(CXX) +enable_language(C) + +set(CMAKE_C_STANDARD 11) +set(CMAKE_CXX_STANDARD 17) + +enable_testing() + + +######################################################################## +# Common compile flags +######################################################################## + +# Disable complex math NaN/INFO range checking for performance +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag(-fcx-limited-range HAVE_CX_LIMITED_RANGE) +if(HAVE_CX_LIMITED_RANGE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcx-limited-range") +endif(HAVE_CX_LIMITED_RANGE) + +include(CheckCCompilerFlag) +check_c_compiler_flag(-fcx-limited-range HAVE_C_LIMITED_RANGE) +if(HAVE_C_LIMITED_RANGE) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fcx-limited-range") +endif(HAVE_C_LIMITED_RANGE) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall") +add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1) + +if(CMAKE_C_COMPILER_ID MATCHES "Clang|GNU") + # Abort compilation if kernel implementations have inconsistent function + # prototypes, i.e. if + # + # kernel_foo_sse(uint32_t *dst, lv32fc_t *src) + # kernel_foo_avx(uint16_t *dst, lv32fc_t *src) + # + # are defined. Note the different data type of the first argument). By + # default 'incompatible-pointer-types' is a warning only and 'pointer-sign' + # is a warning enabled by '-Wall'. These warnings are only applicable to C. + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=incompatible-pointer-types -Werror=pointer-sign") +endif() + +set(CMAKE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) #allows this to be a sub-project +set(CMAKE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) #allows this to be a sub-project +list(INSERT CMAKE_MODULE_PATH 0 ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules) #location for custom "Modules" + +include(VolkBuildTypes) +#select the release build type by default to get optimization flags +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release") + message(STATUS "Build type not specified: defaulting to release.") +endif() +VOLK_CHECK_BUILD_TYPE(${CMAKE_BUILD_TYPE}) +set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "") +message(STATUS "Build type set to ${CMAKE_BUILD_TYPE}.") + +######################################################################## +# Version setup +######################################################################## + +set(VERSION_INFO_MAJOR_VERSION 2) +set(VERSION_INFO_MINOR_VERSION 5) +set(VERSION_INFO_MAINT_VERSION 2) +include(VolkVersion) #setup version info + +macro(set_version_str VAR) + set(IN_VER ${VERSION_INFO_${VAR}_VERSION}) + string(LENGTH "${IN_VER}" VER_LEN) + if(${VER_LEN} EQUAL 1) + set(VOLK_VERSION_${VAR} "0${IN_VER}") + else() + set(VOLK_VERSION_${VAR} "${IN_VER}") + endif() +endmacro() + +set_version_str(MAJOR) +set_version_str(MINOR) +set_version_str(MAINT) + +configure_file( + ${CMAKE_SOURCE_DIR}/include/volk/volk_version.h.in + ${CMAKE_BINARY_DIR}/include/volk/volk_version.h +@ONLY) + +######################################################################## +# Environment setup +######################################################################## +IF(NOT DEFINED CROSSCOMPILE_MULTILIB) + SET(CROSSCOMPILE_MULTILIB "") +ENDIF() +SET(CROSSCOMPILE_MULTILIB ${CROSSCOMPILE_MULTILIB} CACHE STRING "Define \"true\" if you have and want to use multiple C development libs installed for cross compile") + +if(MSVC) + add_definitions(-D_USE_MATH_DEFINES) #enables math constants on all supported versions of MSVC + add_compile_options(/W1) #reduce warnings + add_compile_options(/wo4309) + add_compile_options(/wd4752) + add_compile_options(/wo4273) + add_compile_options(/wo4838) +endif(MSVC) + +######################################################################## +# Dependencies setup +######################################################################## + +# cpu_features - sensible defaults, user settable option +if(CMAKE_SYSTEM_PROCESSOR MATCHES + "(^mips)|(^arm)|(^aarch64)|(x86_64)|(AMD64|amd64)|(^i.86$)|(^powerpc)|(^ppc)") + option(VOLK_CPU_FEATURES "Volk uses cpu_features" ON) +else() + option(VOLK_CPU_FEATURES "Volk uses cpu_features" OFF) +endif() + +if (VOLK_CPU_FEATURES) + find_package(CpuFeatures QUIET) + if(NOT CpuFeatures_FOUND) + message(STATUS "cpu_features package not found. Requiring cpu_features submodule ...") + if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/cpu_features/CMakeLists.txt" ) + message(FATAL_ERROR "cpu_features/CMakeLists.txt not found. Did you forget to git clone recursively?\nFix with: git submodule update --init") + endif() + message(STATUS "Building Volk with cpu_features") + set(BUILD_TESTING OFF CACHE BOOL "Build cpu_features without tests." FORCE) + set(BUILD_PIC ON CACHE BOOL + "Build cpu_features with Position Independent Code (PIC)." + FORCE) + set(CMAKE_POSITION_INDEPENDENT_CODE ON CACHE BOOL + "Build cpu_features with Position Independent Code (PIC)." + FORCE) + set(BUILD_SHARED_LIBS_SAVED "${BUILD_SHARED_LIBS}") + set(BUILD_SHARED_LIBS OFF) + add_subdirectory(cpu_features) + set(BUILD_SHARED_LIBS "${BUILD_SHARED_LIBS_SAVED}") + endif() +else() + message(STATUS "Building Volk without cpu_features") +endif() + +# Python +include(VolkPython) #sets PYTHON_EXECUTABLE and PYTHON_DASH_B +VOLK_PYTHON_CHECK_MODULE("python >= 3.4" sys "sys.version_info >= (3, 4)" PYTHON_MIN_VER_FOUND) +VOLK_PYTHON_CHECK_MODULE("mako >= 0.4.2" mako "mako.__version__ >= '0.4.2'" MAKO_FOUND) + +if(NOT PYTHON_MIN_VER_FOUND) + message(FATAL_ERROR "Python 3.4 or greater required to build VOLK") +endif() + +# Mako +if(NOT MAKO_FOUND) + message(FATAL_ERROR "Mako templates required to build VOLK") +endif() + +# Check if we have std::filesystem +find_package(FILESYSTEM COMPONENTS Final Experimental REQUIRED) + +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + + +######################################################################## +# check for aligned_alloc, since some compilers lack this C11 feature. +# For Apple-clang use `posix_memalign` +# For MSVC use `_aligned_malloc`. +######################################################################## +include(CheckSymbolExists) +if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")) + CHECK_SYMBOL_EXISTS(aligned_alloc stdlib.h USE_ALIGNED_ALLOC) +endif() +if(NOT USE_ALIGNED_ALLOC) + CHECK_SYMBOL_EXISTS(posix_memalign stdlib.h HAVE_POSIX_MEMALIGN) +endif() + +######################################################################## +# Check if Orc is available +######################################################################## +option(ENABLE_ORC "Enable Orc" True) +if(ENABLE_ORC) + find_package(ORC) +else(ENABLE_ORC) + message(STATUS "Disabling use of ORC") +endif(ENABLE_ORC) + +######################################################################## +# Setup doxygen +######################################################################## +add_subdirectory(docs) + +######################################################################## +# Detect /lib versus /lib64 +######################################################################## +if (${CMAKE_INSTALL_LIBDIR} MATCHES lib64) + set(LIB_SUFFIX 64) +endif() + +######################################################################## +# Setup the package config file +######################################################################## +#set variables found in the pc.in file +set(prefix ${CMAKE_INSTALL_PREFIX}) +set(exec_prefix "\${prefix}") +set(libdir "\${exec_prefix}/lib${LIB_SUFFIX}") +set(includedir "\${prefix}/include") + +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/tmpl/volk.pc.in + ${CMAKE_CURRENT_BINARY_DIR}/volk.pc +@ONLY) + +install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/volk.pc + DESTINATION lib${LIB_SUFFIX}/pkgconfig + COMPONENT "volk_devel" +) + +######################################################################## +# Install all headers in the include directories +######################################################################## +set(VOLK_RUNTIME_DIR bin) +set(VOLK_LIBRARY_DIR lib${LIB_SUFFIX}) +set(VOLK_INCLUDE_DIR include) + +install( + DIRECTORY ${CMAKE_SOURCE_DIR}/kernels/volk + DESTINATION include COMPONENT "volk_devel" + FILES_MATCHING PATTERN "*.h" +) + +install(FILES + ${CMAKE_SOURCE_DIR}/include/volk/volk_prefs.h + ${CMAKE_SOURCE_DIR}/include/volk/volk_alloc.hh + ${CMAKE_SOURCE_DIR}/include/volk/volk_complex.h + ${CMAKE_SOURCE_DIR}/include/volk/volk_common.h + ${CMAKE_SOURCE_DIR}/include/volk/saturation_arithmetic.h + ${CMAKE_SOURCE_DIR}/include/volk/volk_avx_intrinsics.h + ${CMAKE_SOURCE_DIR}/include/volk/volk_avx2_intrinsics.h + ${CMAKE_SOURCE_DIR}/include/volk/volk_sse_intrinsics.h + ${CMAKE_SOURCE_DIR}/include/volk/volk_sse3_intrinsics.h + ${CMAKE_SOURCE_DIR}/include/volk/volk_neon_intrinsics.h + ${CMAKE_BINARY_DIR}/include/volk/volk.h + ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h + ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h + ${CMAKE_BINARY_DIR}/include/volk/volk_typedefs.h + ${CMAKE_SOURCE_DIR}/include/volk/volk_malloc.h + ${CMAKE_BINARY_DIR}/include/volk/volk_version.h + ${CMAKE_SOURCE_DIR}/include/volk/constants.h + DESTINATION include/volk + COMPONENT "volk_devel" +) + +######################################################################## +# On Apple only, set install name and use rpath correctly, if not already set +######################################################################## +if(APPLE) + if(NOT CMAKE_INSTALL_NAME_DIR) + set(CMAKE_INSTALL_NAME_DIR + ${CMAKE_INSTALL_PREFIX}/${VOLK_LIBRARY_DIR} CACHE + PATH "Library Install Name Destination Directory" FORCE) + endif(NOT CMAKE_INSTALL_NAME_DIR) + if(NOT CMAKE_INSTALL_RPATH) + set(CMAKE_INSTALL_RPATH + ${CMAKE_INSTALL_PREFIX}/${VOLK_LIBRARY_DIR} CACHE + PATH "Library Install RPath" FORCE) + endif(NOT CMAKE_INSTALL_RPATH) + if(NOT CMAKE_BUILD_WITH_INSTALL_RPATH) + set(CMAKE_BUILD_WITH_INSTALL_RPATH ON CACHE + BOOL "Do Build Using Library Install RPath" FORCE) + endif(NOT CMAKE_BUILD_WITH_INSTALL_RPATH) +endif(APPLE) + +######################################################################## +# Create uninstall target +######################################################################## +configure_file( + ${CMAKE_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake +@ONLY) + +# Only add the target if there isn't one defined already +if(NOT TARGET uninstall) + add_custom_target(uninstall + ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake + ) +endif() + + +######################################################################## +# Install our Cmake modules into $prefix/lib/cmake/volk +# See "Package Configuration Files" on page: +# http://www.cmake.org/Wiki/CMake/Tutorials/Packaging +######################################################################## + +configure_file( + ${CMAKE_SOURCE_DIR}/cmake/Modules/VolkConfig.cmake.in + ${CMAKE_BINARY_DIR}/cmake/Modules/VolkConfig.cmake +@ONLY) + +configure_file( + ${CMAKE_SOURCE_DIR}/cmake/Modules/VolkConfigVersion.cmake.in + ${CMAKE_BINARY_DIR}/cmake/Modules/VolkConfigVersion.cmake +@ONLY) + + +######################################################################## +# Install cmake search routine for external use +######################################################################## + +if(NOT CMAKE_MODULES_DIR) + set(CMAKE_MODULES_DIR lib${LIB_SUFFIX}/cmake) +endif(NOT CMAKE_MODULES_DIR) + +install( + FILES + ${CMAKE_CURRENT_BINARY_DIR}/cmake/Modules/VolkConfig.cmake + ${CMAKE_CURRENT_BINARY_DIR}/cmake/Modules/VolkConfigVersion.cmake + DESTINATION ${CMAKE_MODULES_DIR}/volk + COMPONENT "volk_devel" ) + +install(EXPORT VOLK-export FILE VolkTargets.cmake + NAMESPACE Volk:: DESTINATION ${CMAKE_MODULES_DIR}/volk ) + +######################################################################## +# Option to enable QA testing, on by default +######################################################################## +OPTION(ENABLE_TESTING "Enable QA testing" ON) +if(ENABLE_TESTING) + message(STATUS "QA Testing is enabled.") +else() + message(STATUS "QA Testing is disabled.") +endif() +message(STATUS " Modify using: -DENABLE_TESTING=ON/OFF") + +######################################################################## +# Option to enable post-build profiling using volk_profile, off by default +######################################################################## +OPTION(ENABLE_PROFILING "Launch system profiler after build" OFF) +if(ENABLE_PROFILING) + if(DEFINED VOLK_CONFIGPATH) + get_filename_component(VOLK_CONFIGPATH ${VOLK_CONFIGPATH} ABSOLUTE) + set(VOLK_CONFIGPATH "${VOLK_CONFIGPATH}/volk") + message(STATUS "System profiling is enabled, using path: ${VOLK_CONFIGPATH}") + elseif(DEFINED ENV{VOLK_CONFIGPATH}) + set(VOLK_CONFIGPATH "$ENV{VOLK_CONFIGPATH}/volk") + message(STATUS "System profiling is enabled, using env path: $ENV{VOLK_CONFIGPATH}") + else() + message(STATUS "System profiling is enabled with default paths.") + if(DEFINED ENV{HOME}) + set(VOLK_CONFIGPATH "$ENV{HOME}/.volk") + elseif(DEFINED ENV{APPDATA}) + set(VOLK_CONFIGPATH "$ENV{APPDATA}/.volk") + endif() + endif() +else() + message(STATUS "System profiling is disabled.") +endif() +message(STATUS " Modify using: -DENABLE_PROFILING=ON/OFF") + +######################################################################## +# Setup the library +######################################################################## +add_subdirectory(lib) + +######################################################################## +# And the utility apps +######################################################################## +add_subdirectory(apps) +option(ENABLE_MODTOOL "Enable volk_modtool python utility" True) +if(ENABLE_MODTOOL) + add_subdirectory(python/volk_modtool) +endif() + +######################################################################## +# And the LGPL license check +######################################################################## +# detect default for LGPL +if(VERSION_INFO_MAJOR_VERSION GREATER_EQUAL 3) + OPTION(ENABLE_LGPL "Enable testing for LGPL" ON) +else() + OPTION(ENABLE_LGPL "Enable testing for LGPL" OFF) +endif() + +if(ENABLE_TESTING AND ENABLE_LGPL) + message(STATUS "Checking for LGPL resubmission enabled") + add_test("check_lgpl" ${CMAKE_SOURCE_DIR}/scripts/licensing/count_contrib.sh ${CMAKE_SOURCE_DIR}/AUTHORS_RESUBMITTING_UNDER_LGPL_LICENSE.md) +endif() + +######################################################################## +# Print summary +######################################################################## +message(STATUS "Using install prefix: ${CMAKE_INSTALL_PREFIX}") diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..edfc42b --- /dev/null +++ b/COPYING @@ -0,0 +1,684 @@ +Files in this code repository are to be licensed under the LGPL-3.0-or-later, +which you will find in the file COPYING-LGPL. + +However, you'll find some files that carry a license header that assigns them as +GPL-3.0-or-later, which was the default license for VOLK before version 3. You can find the +full license text of the GPL-3.0-or-later below. + +================================================================================ + + + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/COPYING-GPL b/COPYING-GPL new file mode 120000 index 0000000..d24842f --- /dev/null +++ b/COPYING-GPL @@ -0,0 +1 @@ +COPYING \ No newline at end of file diff --git a/COPYING-LGPL b/COPYING-LGPL new file mode 100644 index 0000000..a72ca0d --- /dev/null +++ b/COPYING-LGPL @@ -0,0 +1,175 @@ +Files in this code repository are to be licensed under the LGPL-3.0-or-later, +which you'll find below. + +However, you'll find some files that carry a license header that assigns them as +GPL-3.0-or-later, which was the default license for VOLK up to version 3. You +can find the full license text of the GPL-3.0-or-later in the file COPYING-GPL. + +================================================================================ + + + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README.md b/README.md new file mode 100644 index 0000000..9a908b4 --- /dev/null +++ b/README.md @@ -0,0 +1,122 @@ +[![Build Status](https://travis-ci.com/gnuradio/volk.svg?branch=master)](https://travis-ci.com/gnuradio/volk) [![Build status](https://ci.appveyor.com/api/projects/status/5o56mgw0do20jlh3/branch/master?svg=true)](https://ci.appveyor.com/project/gnuradio/volk/branch/master) +![Check PR Formatting](https://github.com/gnuradio/volk/workflows/Check%20PR%20Formatting/badge.svg) +![Run VOLK tests](https://github.com/gnuradio/volk/workflows/Run%20VOLK%20tests/badge.svg) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3360942.svg)](https://doi.org/10.5281/zenodo.3360942) + +![VOLK Logo](/docs/volk_logo.png) + +# Welcome to VOLK! + +VOLK is a sub-project of GNU Radio. Please see http://libvolk.org for bug +tracking, documentation, source code, and contact information about VOLK. +See https://www.gnuradio.org/ for information about GNU Radio. + +VOLK is the Vector-Optimized Library of Kernels. It is a library that contains kernels of hand-written SIMD code for different mathematical operations. Since each SIMD architecture can be very different and no compiler has yet come along to handle vectorization properly or highly efficiently, VOLK approaches the problem differently. + +For each architecture or platform that a developer wishes to vectorize for, a new proto-kernel is added to VOLK. At runtime, VOLK will select the correct proto-kernel. In this way, the users of VOLK call a kernel for performing the operation that is platform/architecture agnostic. This allows us to write portable SIMD code. + +Bleeding edge code can be found in our git repository at +https://www.gnuradio.org/git/volk.git/. + +## How to use VOLK: + +For detailed instructions see http://libvolk.org/doxygen/using_volk.html + +See these steps for a quick build guide. + +### Building on most x86 (32-bit and 64-bit) platforms + +```bash +$ mkdir build +$ cd build +$ cmake .. +$ make +$ make test +$ sudo make install + +# You might want to explore "make -j[SOMEVALUE]" options for your multicore CPU. + +# Perform post-installation steps + +# Linux OS: Link and cache shared library +$ sudo ldconfig + +# macOS/Windows: Update PATH environment variable to point to lib install location + +# volk_profile will profile your system so that the best kernel is used +$ volk_profile +``` + +#### Missing submodule +We use [cpu_features](https://github.com/google/cpu_features) as a git submodule to detect CPU features, e.g. AVX. +There are two options to get the required code: +```bash +git clone --recursive https://github.com/gnuradio/volk.git +``` +will automatically clone submodules as well. +In case you missed that, you can just run: +```bash +git submodule update --init --recursive +``` +that'll pull in missing submodules. + + +### Building on Raspberry Pi and other ARM boards (32 bit) + +To build for these boards you need specify the correct cmake toolchain file for best performance. + +_Note: There is no need for adding extra options to the compiler for 64 bit._ + +* Raspberry Pi 4 `arm_cortex_a72_hardfp_native.cmake` +* Raspberry Pi 3 `arm_cortex_a53_hardfp_native.cmake` + +```bash +$ mkdir build && cd build +$ cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchains/arm_cortex_a72_hardfp_native.cmake .. +# make -j4 might be faster +$ make +$ make test +$ sudo make install + +# volk_profile will profile your system so that the best kernel is used +$ volk_profile +``` + +## Code of Conduct +We want to make sure everyone feels welcome. Thus, we follow our [Code of Conduct](docs/CODE_OF_CONDUCT.md). + +## Contributing +We are happy to accept contributions. [Please refer to our contributing guide for further details](docs/CONTRIBUTING.md). +Also, make sure to read the [Developer's Certificate of Origin](docs/DCO.txt) and make sure to sign every commit with `git commit -s`. + +## Releases and development +We maintain a [CHANGELOG](docs/CHANGELOG.md) for every release. Please refer to this file for more detailed information. +We follow semantic versioning as outlined in [our versioning guide](docs/versioning.md). + +## Supported platforms +VOLK aims to be portable to as many platforms as possible. We can only run tests on some platforms. + +### Hardware architectures +Currently VOLK aims to run with optimized kernels on x86 with SSE/AVX and ARM with NEON. + +### OS / Distro +We run tests on a variety of Ubuntu versions and aim to support as many current distros as possible. +The same goal applies to different OSes. Although this does only rarely happen, it might occur that VOLK does not work on obsolete distros, e.g. Ubuntu 12.04. + +### Compilers +We want to make sure VOLK works with C/C++ standard compliant compilers. Of course, as an open source project we focus on open source compilers, most notably GCC and Clang. +We want to make sure VOLK compiles on a wide variety of compilers. Thus, we target AppleClang and MSVC as well. Mind that MSVC lacks `aligned_alloc` support for aligned arrays. We use MSVC specific instructions in this case which cannot be `free`'d with `free`. + + +## License + +VOLK is migrating from the GNU General Public License v3.0 or later (GPL-3.0-or-later) +to the GNU Lesser General Public License v3.0 or later (LGPL-3.0-or-later). +At this point in time, much of the code in the repository is still GPL-licensed. +New contributors are required to use the LGPLv3+ for their code contributions. +Existing contributors are very kindly requested to re-submit their GPL-3.0-or-later license code contributions to LGPL-3.0-or-later by +adding their name, GitHub handle, and email address(es) used for VOLK commits +to the file [AUTHORS_RESUBMITTING_UNDER_LGPL_LICENSE.md](docs/AUTHORS_RESUBMITTING_UNDER_LGPL_LICENSE.md). + +### Re-licensing status +VOLK is ready for a VOLK 3.0 release which marks our final switch to LGPL. diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt new file mode 100644 index 0000000..8ac9187 --- /dev/null +++ b/apps/CMakeLists.txt @@ -0,0 +1,91 @@ +# +# Copyright 2011-2013 Free Software Foundation, Inc. +# +# This file is part of VOLK +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +######################################################################## +# Setup profiler +######################################################################## + +# POSIX_MEMALIGN: If we have to fall back to `posix_memalign`. +if(HAVE_POSIX_MEMALIGN) + message(STATUS "Use `posix_memalign` for aligned malloc!") + add_definitions(-DHAVE_POSIX_MEMALIGN) +endif(HAVE_POSIX_MEMALIGN) + +# MAKE volk_profile +add_executable(volk_profile + ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc + ${PROJECT_SOURCE_DIR}/lib/qa_utils.cc + ${CMAKE_CURRENT_SOURCE_DIR}/volk_option_helpers.cc +) + +if(MSVC) + target_include_directories(volk_profile + PRIVATE $ + ) +endif(MSVC) + +target_include_directories(volk_profile + PRIVATE $ + PRIVATE $ + PRIVATE $ + PRIVATE $ + PRIVATE ${CMAKE_CURRENT_BINARY_DIR} + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} +) + +add_definitions(-DHAS_STD_FILESYSTEM=1) +if(${find_experimental}) + add_definitions(-DHAS_STD_FILESYSTEM_EXPERIMENTAL=1) +endif() +target_link_libraries(volk_profile PRIVATE std::filesystem) + +if(ENABLE_STATIC_LIBS) + target_link_libraries(volk_profile PRIVATE volk_static) + set_target_properties(volk_profile PROPERTIES LINK_FLAGS "-static") +else() + target_link_libraries(volk_profile PRIVATE volk) +endif() + +install( + TARGETS volk_profile + DESTINATION bin + COMPONENT "volk" +) + +# MAKE volk-config-info +add_executable(volk-config-info volk-config-info.cc ${CMAKE_CURRENT_SOURCE_DIR}/volk_option_helpers.cc + ) + +if(ENABLE_STATIC_LIBS) + target_link_libraries(volk-config-info volk_static) + set_target_properties(volk-config-info PROPERTIES LINK_FLAGS "-static") +else() + target_link_libraries(volk-config-info volk) +endif() + +install( + TARGETS volk-config-info + DESTINATION bin + COMPONENT "volk" +) + +# Launch volk_profile if requested to do so +if(ENABLE_PROFILING) + if(DEFINED VOLK_CONFIGPATH) + set( VOLK_CONFIG_ARG "-p${VOLK_CONFIGPATH}" ) + set( VOLK_CONFIG "${VOLK_CONFIGPATH}/volk_config" ) + endif() + + add_custom_command(OUTPUT ${VOLK_CONFIG} + COMMAND volk_profile "${VOLK_CONFIG_ARG}" + DEPENDS volk_profile + COMMENT "Launching profiler, this may take a few minutes..." + ) + add_custom_target(volk-profile-run ALL DEPENDS ${VOLK_CONFIG}) + +endif() diff --git a/apps/plot_best_vs_generic.py b/apps/plot_best_vs_generic.py new file mode 100755 index 0000000..6e8a6e4 --- /dev/null +++ b/apps/plot_best_vs_generic.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# Copyright 2019 Free Software Foundation, Inc. +# +# This file is part of VOLK +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +# This script is used to compare the generic kernels to the highest performing kernel, for each operation +# Run: +# ./volk_profile -j volk_results.json +# Then run this script under python3 + +import matplotlib.pyplot as plt +import numpy as np +import json + +filename = 'volk_results.json' + +operations = [] +metrics = [] +with open(filename) as json_file: + data = json.load(json_file) + for test in data['volk_tests']: + if ('generic' in test['results']) or ('u_generic' in test['results']): # some dont have a generic kernel + operations.append(test['name'][5:]) # remove volk_ prefix that they all have + extension_performance = [] + for key, val in test['results'].items(): + if key not in ['generic', 'u_generic']: # exclude generic results, when trying to find fastest time + extension_performance.append(val['time']) + try: + generic_time = test['results']['generic']['time'] + except: + generic_time = test['results']['u_generic']['time'] + metrics.append(extension_performance[np.argmin(extension_performance)]/generic_time) + + +plt.bar(np.arange(len(metrics)), metrics) +plt.hlines(1.0, -1, len(metrics), colors='r', linestyles='dashed') +plt.axis([-1, len(metrics), 0, 2]) +plt.xticks(np.arange(len(operations)), operations, rotation=90) +plt.ylabel('Time taken of fastest kernel relative to generic kernel') +plt.tight_layout() +plt.show() diff --git a/apps/volk-config-info.cc b/apps/volk-config-info.cc new file mode 100644 index 0000000..1bef49e --- /dev/null +++ b/apps/volk-config-info.cc @@ -0,0 +1,73 @@ +/* -*- c++ -*- */ +/* + * Copyright 2013, 2016, 2018 Free Software Foundation, Inc. + * + * This file is part of VOLK + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#if HAVE_CONFIG_H +#include +#endif + +#include // for volk_available_machines, volk_c_com... +#include // for operator<<, endl, cout, ostream +#include // for string + +#include "volk/volk.h" // for volk_get_alignment, volk_get_machine +#include "volk_option_helpers.h" // for option_list, option_t + +void print_alignment() +{ + std::cout << "Alignment in bytes: " << volk_get_alignment() << std::endl; +} + +void print_malloc() +{ + // You don't want to change the volk_malloc code, so just copy the if/else + // structure from there and give an explanation for the implementations + std::cout << "Used malloc implementation: "; +#if HAVE_POSIX_MEMALIGN + std::cout << "posix_memalign" << std::endl; +#elif defined(_MSC_VER) + std::cout << "_aligned_malloc" << std::endl; +#else + std::cout << "C11 aligned_alloc" << std::endl; +#endif +} + + +int main(int argc, char** argv) +{ + + option_list our_options("volk-config-info"); + our_options.add( + option_t("prefix", "", "print the VOLK installation prefix", volk_prefix())); + our_options.add( + option_t("cc", "", "print the VOLK C compiler version", volk_c_compiler())); + our_options.add( + option_t("cflags", "", "print the VOLK CFLAGS", volk_compiler_flags())); + our_options.add(option_t( + "all-machines", "", "print VOLK machines built", volk_available_machines())); + our_options.add(option_t("avail-machines", + "", + "print VOLK machines on the current " + "platform", + volk_list_machines)); + our_options.add(option_t("machine", + "", + "print the current VOLK machine that will be used", + volk_get_machine())); + our_options.add( + option_t("alignment", "", "print the memory alignment", print_alignment)); + our_options.add(option_t("malloc", + "", + "print the malloc implementation used in volk_malloc", + print_malloc)); + our_options.add(option_t("version", "v", "print the VOLK version", volk_version())); + + our_options.parse(argc, argv); + + return 0; +} diff --git a/apps/volk_option_helpers.cc b/apps/volk_option_helpers.cc new file mode 100644 index 0000000..7027128 --- /dev/null +++ b/apps/volk_option_helpers.cc @@ -0,0 +1,241 @@ +/* -*- c++ -*- */ +/* + * Copyright 2018-2020 Free Software Foundation, Inc. + * + * This file is part of VOLK + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + + +#include "volk_option_helpers.h" + +#include // IWYU pragma: keep +#include // IWYU pragma: keep +#include // IWYU pragma: keep +#include // for exception +#include // for operator<<, endl, basic_ostream, cout, ostream +#include // for pair + +/* + * Option type + */ +option_t::option_t(std::string t_longform, + std::string t_shortform, + std::string t_msg, + void (*t_callback)()) + : longform("--" + t_longform), + shortform("-" + t_shortform), + msg(t_msg), + callback(t_callback) +{ + option_type = VOID_CALLBACK; +} + +option_t::option_t(std::string t_longform, + std::string t_shortform, + std::string t_msg, + void (*t_callback)(int)) + : longform("--" + t_longform), + shortform("-" + t_shortform), + msg(t_msg), + callback((void (*)())t_callback) +{ + option_type = INT_CALLBACK; +} + +option_t::option_t(std::string t_longform, + std::string t_shortform, + std::string t_msg, + void (*t_callback)(float)) + : longform("--" + t_longform), + shortform("-" + t_shortform), + msg(t_msg), + callback((void (*)())t_callback) +{ + option_type = FLOAT_CALLBACK; +} + +option_t::option_t(std::string t_longform, + std::string t_shortform, + std::string t_msg, + void (*t_callback)(bool)) + : longform("--" + t_longform), + shortform("-" + t_shortform), + msg(t_msg), + callback((void (*)())t_callback) +{ + option_type = BOOL_CALLBACK; +} + +option_t::option_t(std::string t_longform, + std::string t_shortform, + std::string t_msg, + void (*t_callback)(std::string)) + : longform("--" + t_longform), + shortform("-" + t_shortform), + msg(t_msg), + callback((void (*)())t_callback) +{ + option_type = STRING_CALLBACK; +} + +option_t::option_t(std::string t_longform, + std::string t_shortform, + std::string t_msg, + std::string t_printval) + : longform("--" + t_longform), + shortform("-" + t_shortform), + msg(t_msg), + printval(t_printval) +{ + option_type = STRING; +} + + +/* + * Option List + */ + +option_list::option_list(std::string program_name) : d_program_name(program_name) +{ + d_internal_list = std::vector(); +} + + +void option_list::add(option_t opt) { d_internal_list.push_back(opt); } + +void option_list::parse(int argc, char** argv) +{ + for (int arg_number = 0; arg_number < argc; ++arg_number) { + for (std::vector::iterator this_option = d_internal_list.begin(); + this_option != d_internal_list.end(); + this_option++) { + int int_val = INT_MIN; + if (this_option->longform == std::string(argv[arg_number]) || + this_option->shortform == std::string(argv[arg_number])) { + + if (d_present_options.count(this_option->longform) == 0) { + d_present_options.insert( + std::pair(this_option->longform, 1)); + } else { + d_present_options[this_option->longform] += 1; + } + switch (this_option->option_type) { + case VOID_CALLBACK: + this_option->callback(); + break; + case INT_CALLBACK: + try { + int_val = atoi(argv[++arg_number]); + ((void (*)(int))this_option->callback)(int_val); + } catch (std::exception& exc) { + std::cout << "An int option can only receive a number" + << std::endl; + throw std::exception(); + }; + break; + case FLOAT_CALLBACK: + try { + double double_val = atof(argv[++arg_number]); + ((void (*)(float))this_option->callback)(double_val); + } catch (std::exception& exc) { + std::cout << "A float option can only receive a number" + << std::endl; + throw std::exception(); + }; + break; + case BOOL_CALLBACK: + try { + if (arg_number == (argc - 1)) { // this is the last arg + int_val = 1; + } else { // sneak a look at the next arg since it's present + char* next_arg = argv[arg_number + 1]; + if ((strncmp(next_arg, "-", 1) == 0) || + (strncmp(next_arg, "--", 2) == 0)) { + // the next arg is actually an arg, the bool is just + // present, set to true + int_val = 1; + } else if (strncmp(next_arg, "true", 4) == 0) { + int_val = 1; + } else if (strncmp(next_arg, "false", 5) == 0) { + int_val = 0; + } else { + // we got a number or a string. + // convert it to a number and depend on the catch to + // report an error condition + int_val = (bool)atoi(argv[++arg_number]); + } + } + } catch (std::exception& e) { + int_val = INT_MIN; + }; + if (int_val == INT_MIN) { + std::cout + << "option: '" << argv[arg_number - 1] + << "' -> received an unknown value. Boolean " + "options should receive one of '0', '1', 'true', 'false'." + << std::endl; + throw std::exception(); + } else if (int_val) { + ((void (*)(bool))this_option->callback)(int_val); + } + break; + case STRING_CALLBACK: + try { + ((void (*)(std::string))this_option->callback)( + argv[++arg_number]); + } catch (std::exception& exc) { + throw std::exception(); + }; + case STRING: + std::cout << this_option->printval << std::endl; + break; + } + } + } + if (std::string("--help") == std::string(argv[arg_number]) || + std::string("-h") == std::string(argv[arg_number])) { + d_present_options.insert(std::pair("--help", 1)); + help(); + } + } +} + +bool option_list::present(std::string option_name) +{ + if (d_present_options.count("--" + option_name)) { + return true; + } else { + return false; + } +} + +void option_list::help() +{ + std::cout << d_program_name << std::endl; + std::cout << " -h [ --help ] \t\tdisplay this help message" << std::endl; + for (std::vector::iterator this_option = d_internal_list.begin(); + this_option != d_internal_list.end(); + this_option++) { + std::string help_line(" "); + if (this_option->shortform == "-") { + help_line += this_option->longform + " "; + } else { + help_line += this_option->shortform + " [ " + this_option->longform + " ]"; + } + + switch (help_line.size() / 8) { + case 0: + help_line += "\t"; + case 1: + help_line += "\t"; + case 2: + help_line += "\t"; + case 3: + help_line += "\t"; + } + help_line += this_option->msg; + std::cout << help_line << std::endl; + } +} diff --git a/apps/volk_option_helpers.h b/apps/volk_option_helpers.h new file mode 100644 index 0000000..fed5ba3 --- /dev/null +++ b/apps/volk_option_helpers.h @@ -0,0 +1,83 @@ +/* -*- c++ -*- */ +/* + * Copyright 2018-2020 Free Software Foundation, Inc. + * + * This file is part of VOLK + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#ifndef VOLK_VOLK_OPTION_HELPERS_H +#define VOLK_VOLK_OPTION_HELPERS_H + +#include +#include +#include +#include +#include + +typedef enum { + VOID_CALLBACK, + INT_CALLBACK, + BOOL_CALLBACK, + STRING_CALLBACK, + FLOAT_CALLBACK, + STRING, +} VOLK_OPTYPE; + +class option_t +{ +public: + option_t(std::string t_longform, + std::string t_shortform, + std::string t_msg, + void (*t_callback)()); + option_t(std::string t_longform, + std::string t_shortform, + std::string t_msg, + void (*t_callback)(int)); + option_t(std::string t_longform, + std::string t_shortform, + std::string t_msg, + void (*t_callback)(float)); + option_t(std::string t_longform, + std::string t_shortform, + std::string t_msg, + void (*t_callback)(bool)); + option_t(std::string t_longform, + std::string t_shortform, + std::string t_msg, + void (*t_callback)(std::string)); + option_t(std::string t_longform, + std::string t_shortform, + std::string t_msg, + std::string t_printval); + + std::string longform; + std::string shortform; + std::string msg; + VOLK_OPTYPE option_type; + std::string printval; + void (*callback)(); +}; + +class option_list +{ +public: + option_list(std::string program_name); + bool present(std::string option_name); + + void add(option_t opt); + + void parse(int argc, char** argv); + + void help(); + +private: + std::string d_program_name; + std::vector d_internal_list; + std::map d_present_options; +}; + + +#endif // VOLK_VOLK_OPTION_HELPERS_H diff --git a/apps/volk_profile.cc b/apps/volk_profile.cc new file mode 100644 index 0000000..7241f20 --- /dev/null +++ b/apps/volk_profile.cc @@ -0,0 +1,335 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012-2014 Free Software Foundation, Inc. + * + * This file is part of VOLK + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#if HAS_STD_FILESYSTEM_EXPERIMENTAL +#include +#else +#include +#endif +#include // for size_t +#include // for stat +#include // for volk_get_config_path +#include // IWYU pragma: keep +#include // for operator<<, basic_ostream +#include // for map, map<>::iterator +#include // for pair +#include // for vector, vector<>::const_... + +#include "kernel_tests.h" // for init_test_list +#include "qa_utils.h" // for volk_test_results_t, vol... +#include "volk/volk_complex.h" // for lv_32fc_t +#include "volk_option_helpers.h" // for option_list, option_t +#include "volk_profile.h" + +#if HAS_STD_FILESYSTEM_EXPERIMENTAL +namespace fs = std::experimental::filesystem; +#else +namespace fs = std::filesystem; +#endif + +volk_test_params_t test_params(1e-6f, 327.f, 131071, 1987, false, ""); + +void set_benchmark(bool val) { test_params.set_benchmark(val); } +void set_tolerance(float val) { test_params.set_tol(val); } +void set_vlen(int val) { test_params.set_vlen((unsigned int)val); } +void set_iter(int val) { test_params.set_iter((unsigned int)val); } +void set_substr(std::string val) { test_params.set_regex(val); } +bool update_mode = false; +void set_update(bool val) { update_mode = val; } +bool dry_run = false; +void set_dryrun(bool val) { dry_run = val; } +std::string json_filename(""); +void set_json(std::string val) { json_filename = val; } +std::string volk_config_path(""); +void set_volk_config(std::string val) { volk_config_path = val; } + +int main(int argc, char* argv[]) +{ + + option_list profile_options("volk_profile"); + profile_options.add( + option_t("benchmark", "b", "Run all kernels (benchmark mode)", set_benchmark)); + profile_options.add( + option_t("tol", "t", "Set the default tolerance for all tests", set_tolerance)); + profile_options.add( + option_t("vlen", "v", "Set the default vector length for tests", set_vlen)); + profile_options.add((option_t( + "iter", "i", "Set the default number of test iterations per kernel", set_iter))); + profile_options.add( + (option_t("tests-substr", "R", "Run tests matching substring", set_substr))); + profile_options.add( + (option_t("update", "u", "Run only kernels missing from config", set_update))); + profile_options.add( + (option_t("dry-run", + "n", + "Dry run. Respect other options, but don't write to file", + set_dryrun))); + profile_options.add((option_t( + "json", "j", "Write results to JSON file named as argument value", set_json))); + profile_options.add( + (option_t("path", "p", "Specify the volk_config path", set_volk_config))); + profile_options.parse(argc, argv); + + if (profile_options.present("help")) { + return 0; + } + + if (dry_run) { + std::cout << "Warning: this IS a dry-run. Config will not be written!" + << std::endl; + } + + // Adding program options + std::ofstream json_file; + std::string config_file; + + if (json_filename != "") { + json_file.open(json_filename.c_str()); + } + + if (volk_config_path != "") { + config_file = volk_config_path + "/volk_config"; + } + + // Run tests + std::vector results; + if (update_mode) { + if (config_file != "") + read_results(&results, config_file); + else + read_results(&results); + } + + // Initialize the list of tests + std::vector test_cases = init_test_list(test_params); + + // Iterate through list of tests running each one + std::string substr_to_match(test_params.kernel_regex()); + for (unsigned int ii = 0; ii < test_cases.size(); ++ii) { + bool regex_match = true; + + volk_test_case_t test_case = test_cases[ii]; + // if the kernel name matches regex then do the test + std::string test_case_name = test_case.name(); + if (test_case_name.find(substr_to_match) == std::string::npos) { + regex_match = false; + } + + // if we are in update mode check if we've already got results + // if we have any, then no need to test that kernel + bool update = true; + if (update_mode) { + for (unsigned int jj = 0; jj < results.size(); ++jj) { + if (results[jj].name == test_case.name() || + results[jj].name == test_case.puppet_master_name()) { + update = false; + break; + } + } + } + + if (regex_match && update) { + try { + run_volk_tests(test_case.desc(), + test_case.kernel_ptr(), + test_case.name(), + test_case.test_parameters(), + &results, + test_case.puppet_master_name()); + } catch (std::string& error) { + std::cerr << "Caught Exception in 'run_volk_tests': " << error + << std::endl; + } + } + } + + + // Output results according to provided options + if (json_filename != "") { + write_json(json_file, results); + json_file.close(); + } + + if (!dry_run) { + if (config_file != "") + write_results(&results, false, config_file); + else + write_results(&results, false); + } else { + std::cout << "Warning: this was a dry-run. Config not generated" << std::endl; + } + return 0; +} + +void read_results(std::vector* results) +{ + char path[1024]; + volk_get_config_path(path, true); + if (path[0] == 0) { + std::cout << "No prior test results found ..." << std::endl; + return; + } + + read_results(results, std::string(path)); +} + +void read_results(std::vector* results, std::string path) +{ + struct stat buffer; + bool config_status = (stat(path.c_str(), &buffer) == 0); + + if (config_status) { + // a config exists and we are reading results from it + std::ifstream config(path.c_str()); + char config_line[256]; + while (config.getline(config_line, 255)) { + // tokenize the input line by kernel_name unaligned aligned + // then push back in the results vector with fields filled in + + std::vector single_kernel_result; + std::string config_str(config_line); + std::size_t str_size = config_str.size(); + std::size_t found = config_str.find(' '); + + // Split line by spaces + while (found && found < str_size) { + found = config_str.find(' '); + // kernel names MUST be less than 128 chars, which is + // a length restricted by volk/volk_prefs.c + // on the last token in the parsed string we won't find a space + // so make sure we copy at most 128 chars. + if (found > 127) { + found = 127; + } + str_size = config_str.size(); + char line_buffer[128] = { '\0' }; + config_str.copy(line_buffer, found + 1, 0); + line_buffer[found] = '\0'; + single_kernel_result.push_back(std::string(line_buffer)); + config_str.erase(0, found + 1); + } + + if (single_kernel_result.size() == 3) { + volk_test_results_t kernel_result; + kernel_result.name = std::string(single_kernel_result[0]); + kernel_result.config_name = std::string(single_kernel_result[0]); + kernel_result.best_arch_u = std::string(single_kernel_result[1]); + kernel_result.best_arch_a = std::string(single_kernel_result[2]); + results->push_back(kernel_result); + } + } + } +} + +void write_results(const std::vector* results, bool update_result) +{ + char path[1024]; + volk_get_config_path(path, false); + if (path[0] == 0) { + std::cout << "Aborting 'No config save path found' ..." << std::endl; + return; + } + + write_results(results, update_result, std::string(path)); +} + +void write_results(const std::vector* results, + bool update_result, + const std::string path) +{ + // struct stat buffer; + // bool config_status = (stat (path.c_str(), &buffer) == 0); + + /* + * These + */ + const fs::path config_path(path); + if (!fs::exists(config_path.parent_path())) { + std::cout << "Creating " << config_path.parent_path() << "..." << std::endl; + fs::create_directories(config_path.parent_path()); + } + + std::ofstream config; + if (update_result) { + std::cout << "Updating " << path << "..." << std::endl; + config.open(path.c_str(), std::ofstream::app); + if (!config.is_open()) { // either we don't have write access or we don't have the + // dir yet + std::cout << "Error opening file " << path << std::endl; + } + } else { + std::cout << "Writing " << path << "..." << std::endl; + config.open(path.c_str()); + if (!config.is_open()) { // either we don't have write access or we don't have the + // dir yet + std::cout << "Error opening file " << path << std::endl; + } + + config << "\ +#this file is generated by volk_profile.\n\ +#the function name is followed by the preferred architecture.\n\ +"; + } + + std::vector::const_iterator profile_results; + for (profile_results = results->begin(); profile_results != results->end(); + ++profile_results) { + config << profile_results->config_name << " " << profile_results->best_arch_a + << " " << profile_results->best_arch_u << std::endl; + } + config.close(); +} + +void write_json(std::ofstream& json_file, std::vector results) +{ + json_file << "{" << std::endl; + json_file << " \"volk_tests\": [" << std::endl; + size_t len = results.size(); + size_t i = 0; + std::vector::iterator result; + for (result = results.begin(); result != results.end(); ++result) { + json_file << " {" << std::endl; + json_file << " \"name\": \"" << result->name << "\"," << std::endl; + json_file << " \"vlen\": " << (int)(result->vlen) << "," << std::endl; + json_file << " \"iter\": " << result->iter << "," << std::endl; + json_file << " \"best_arch_a\": \"" << result->best_arch_a << "\"," + << std::endl; + json_file << " \"best_arch_u\": \"" << result->best_arch_u << "\"," + << std::endl; + json_file << " \"results\": {" << std::endl; + size_t results_len = result->results.size(); + size_t ri = 0; + + std::map::iterator kernel_time_pair; + for (kernel_time_pair = result->results.begin(); + kernel_time_pair != result->results.end(); + ++kernel_time_pair) { + volk_test_time_t time = kernel_time_pair->second; + json_file << " \"" << time.name << "\": {" << std::endl; + json_file << " \"name\": \"" << time.name << "\"," << std::endl; + json_file << " \"time\": " << time.time << "," << std::endl; + json_file << " \"units\": \"" << time.units << "\"" << std::endl; + json_file << " }"; + if (ri + 1 != results_len) { + json_file << ","; + } + json_file << std::endl; + ri++; + } + json_file << " }" << std::endl; + json_file << " }"; + if (i + 1 != len) { + json_file << ","; + } + json_file << std::endl; + i++; + } + json_file << " ]" << std::endl; + json_file << "}" << std::endl; +} diff --git a/apps/volk_profile.h b/apps/volk_profile.h new file mode 100644 index 0000000..b532485 --- /dev/null +++ b/apps/volk_profile.h @@ -0,0 +1,23 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012-2014 Free Software Foundation, Inc. + * + * This file is part of VOLK + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include // for bool +#include // for ofstream +#include // for string +#include // for vector + +class volk_test_results_t; + +void read_results(std::vector* results); +void read_results(std::vector* results, std::string path); +void write_results(const std::vector* results, bool update_result); +void write_results(const std::vector* results, + bool update_result, + const std::string path); +void write_json(std::ofstream& json_file, std::vector results); diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..4bbb7e7 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,38 @@ +clone_depth: 1 +image: Visual Studio 2019 +cache: + - packages -> appveyor.yml +environment: + environment: + matrix: + - job_name: VS 16 2019 / python 3.8 + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019 + CMAKE_GENERATOR: Visual Studio 16 2019 + PYTHON: "C:\\Python38-x64" + +install: + # Prepend the selected Python to the PATH of this build + - SET PATH=%PYTHON%;%PYTHON%\Scripts;%PATH% + # Display version information about selected python and pip + - python --version + - python -c "import sys, platform, struct; + print(sys.platform, platform.machine(), struct.calcsize('P')*8)" + - pip --version + - pip install mako +before_build: + - git submodule update --init --recursive + - cmake -G "%CMAKE_GENERATOR%" -A x64 \ + -DCMAKE_BUILD_TYPE:STRING=Release -DENABLE_ORC:BOOL=OFF -DENABLE_TESTING:BOOL=ON \ + . +build_script: + - cmake --build . --config Release --target INSTALL +test_script: + - ctest -V --output-on-failure -C Release +after_test: + - cd "c:\Program Files" + - 7z a "c:\libvolk-x64-%VC_VERSION%.zip" volk + - mkdir dlls + - cd dlls + - 7z a "c:\libvolk-x64-deps-%VC_VERSION%.zip" * + - appveyor PushArtifact c:\libvolk-x64-%VC_VERSION%.zip + - appveyor PushArtifact c:\libvolk-x64-deps-%VC_VERSION%.zip diff --git a/cmake/Modules/CMakeParseArgumentsCopy.cmake b/cmake/Modules/CMakeParseArgumentsCopy.cmake new file mode 100644 index 0000000..ab44d84 --- /dev/null +++ b/cmake/Modules/CMakeParseArgumentsCopy.cmake @@ -0,0 +1,145 @@ +# Copyright 2014, 2018 Free Software Foundation, Inc. +# +# This file is part of VOLK. +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +# CMAKE_PARSE_ARGUMENTS( args...) +# +# CMAKE_PARSE_ARGUMENTS() is intended to be used in macros or functions for +# parsing the arguments given to that macro or function. +# It processes the arguments and defines a set of variables which hold the +# values of the respective options. +# +# The argument contains all options for the respective macro, +# i.e. keywords which can be used when calling the macro without any value +# following, like e.g. the OPTIONAL keyword of the install() command. +# +# The argument contains all keywords for this macro +# which are followed by one value, like e.g. DESTINATION keyword of the +# install() command. +# +# The argument contains all keywords for this macro +# which can be followed by more than one value, like e.g. the TARGETS or +# FILES keywords of the install() command. +# +# When done, CMAKE_PARSE_ARGUMENTS() will have defined for each of the +# keywords listed in , and +# a variable composed of the given +# followed by "_" and the name of the respective keyword. +# These variables will then hold the respective value from the argument list. +# For the keywords this will be TRUE or FALSE. +# +# All remaining arguments are collected in a variable +# _UNPARSED_ARGUMENTS, this can be checked afterwards to see whether +# your macro was called with unrecognized parameters. +# +# As an example here a my_install() macro, which takes similar arguments as the +# real install() command: +# +# function(MY_INSTALL) +# set(options OPTIONAL FAST) +# set(oneValueArgs DESTINATION RENAME) +# set(multiValueArgs TARGETS CONFIGURATIONS) +# cmake_parse_arguments(MY_INSTALL "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) +# ... +# +# Assume my_install() has been called like this: +# my_install(TARGETS foo bar DESTINATION bin OPTIONAL blub) +# +# After the cmake_parse_arguments() call the macro will have set the following +# variables: +# MY_INSTALL_OPTIONAL = TRUE +# MY_INSTALL_FAST = FALSE (this option was not used when calling my_install() +# MY_INSTALL_DESTINATION = "bin" +# MY_INSTALL_RENAME = "" (was not used) +# MY_INSTALL_TARGETS = "foo;bar" +# MY_INSTALL_CONFIGURATIONS = "" (was not used) +# MY_INSTALL_UNPARSED_ARGUMENTS = "blub" (no value expected after "OPTIONAL" +# +# You can the continue and process these variables. +# +# Keywords terminate lists of values, e.g. if directly after a one_value_keyword +# another recognized keyword follows, this is interpreted as the beginning of +# the new option. +# E.g. my_install(TARGETS foo DESTINATION OPTIONAL) would result in +# MY_INSTALL_DESTINATION set to "OPTIONAL", but MY_INSTALL_DESTINATION would +# be empty and MY_INSTALL_OPTIONAL would be set to TRUE therefore. + +#============================================================================= +# Copyright 2010 Alexander Neundorf +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + + +if(__CMAKE_PARSE_ARGUMENTS_INCLUDED) + return() +endif() +set(__CMAKE_PARSE_ARGUMENTS_INCLUDED TRUE) + + +function(CMAKE_PARSE_ARGUMENTS prefix _optionNames _singleArgNames _multiArgNames) + # first set all result variables to empty/FALSE + foreach(arg_name ${_singleArgNames} ${_multiArgNames}) + set(${prefix}_${arg_name}) + endforeach(arg_name) + + foreach(option ${_optionNames}) + set(${prefix}_${option} FALSE) + endforeach(option) + + set(${prefix}_UNPARSED_ARGUMENTS) + + set(insideValues FALSE) + set(currentArgName) + + # now iterate over all arguments and fill the result variables + foreach(currentArg ${ARGN}) + list(FIND _optionNames "${currentArg}" optionIndex) # ... then this marks the end of the arguments belonging to this keyword + list(FIND _singleArgNames "${currentArg}" singleArgIndex) # ... then this marks the end of the arguments belonging to this keyword + list(FIND _multiArgNames "${currentArg}" multiArgIndex) # ... then this marks the end of the arguments belonging to this keyword + + if(${optionIndex} EQUAL -1 AND ${singleArgIndex} EQUAL -1 AND ${multiArgIndex} EQUAL -1) + if(insideValues) + if("${insideValues}" STREQUAL "SINGLE") + set(${prefix}_${currentArgName} ${currentArg}) + set(insideValues FALSE) + elseif("${insideValues}" STREQUAL "MULTI") + list(APPEND ${prefix}_${currentArgName} ${currentArg}) + endif() + else(insideValues) + list(APPEND ${prefix}_UNPARSED_ARGUMENTS ${currentArg}) + endif(insideValues) + else() + if(NOT ${optionIndex} EQUAL -1) + set(${prefix}_${currentArg} TRUE) + set(insideValues FALSE) + elseif(NOT ${singleArgIndex} EQUAL -1) + set(currentArgName ${currentArg}) + set(${prefix}_${currentArgName}) + set(insideValues "SINGLE") + elseif(NOT ${multiArgIndex} EQUAL -1) + set(currentArgName ${currentArg}) + set(${prefix}_${currentArgName}) + set(insideValues "MULTI") + endif() + endif() + + endforeach(currentArg) + + # propagate the result variables to the caller: + foreach(arg_name ${_singleArgNames} ${_multiArgNames} ${_optionNames}) + set(${prefix}_${arg_name} ${${prefix}_${arg_name}} PARENT_SCOPE) + endforeach(arg_name) + set(${prefix}_UNPARSED_ARGUMENTS ${${prefix}_UNPARSED_ARGUMENTS} PARENT_SCOPE) + +endfunction(CMAKE_PARSE_ARGUMENTS _options _singleArgs _multiArgs) diff --git a/cmake/Modules/FindFILESYSTEM.cmake b/cmake/Modules/FindFILESYSTEM.cmake new file mode 100644 index 0000000..172a77c --- /dev/null +++ b/cmake/Modules/FindFILESYSTEM.cmake @@ -0,0 +1,261 @@ +# Copyright 2019 Free Software Foundation, Inc. +# +# This file is part of VOLK. +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +# Original code from https://github.com/vector-of-bool/CMakeCM and modified +# by C. Fernandez. The original code is distributed under the OSI-approved +# BSD 3-Clause License. See https://cmake.org/licensing for details. + +#[=======================================================================[.rst: + +FindFILESYSTEM +############## + +This module supports the C++17 standard library's filesystem utilities. Use the +:imp-target:`std::filesystem` imported target to + +Options +******* + +The ``COMPONENTS`` argument to this module supports the following values: + +.. find-component:: Experimental + :name: fs.Experimental + + Allows the module to find the "experimental" Filesystem TS version of the + Filesystem library. This is the library that should be used with the + ``std::experimental::filesystem`` namespace. + +.. find-component:: Final + :name: fs.Final + + Finds the final C++17 standard version of the filesystem library. + +If no components are provided, behaves as if the +:find-component:`fs.Final` component was specified. + +If both :find-component:`fs.Experimental` and :find-component:`fs.Final` are +provided, first looks for ``Final``, and falls back to ``Experimental`` in case +of failure. If ``Final`` is found, :imp-target:`std::filesystem` and all +:ref:`variables ` will refer to the ``Final`` version. + + +Imported Targets +**************** + +.. imp-target:: std::filesystem + + The ``std::filesystem`` imported target is defined when any requested + version of the C++ filesystem library has been found, whether it is + *Experimental* or *Final*. + + If no version of the filesystem library is available, this target will not + be defined. + + .. note:: + This target has ``cxx_std_17`` as an ``INTERFACE`` + :ref:`compile language standard feature `. Linking + to this target will automatically enable C++17 if no later standard + version is already required on the linking target. + + +.. _fs.variables: + +Variables +********* + +.. variable:: CXX_FILESYSTEM_IS_EXPERIMENTAL + + Set to ``TRUE`` when the :find-component:`fs.Experimental` version of C++ + filesystem library was found, otherwise ``FALSE``. + +.. variable:: CXX_FILESYSTEM_HAVE_FS + + Set to ``TRUE`` when a filesystem header was found. + +.. variable:: CXX_FILESYSTEM_HEADER + + Set to either ``filesystem`` or ``experimental/filesystem`` depending on + whether :find-component:`fs.Final` or :find-component:`fs.Experimental` was + found. + +.. variable:: CXX_FILESYSTEM_NAMESPACE + + Set to either ``std::filesystem`` or ``std::experimental::filesystem`` + depending on whether :find-component:`fs.Final` or + :find-component:`fs.Experimental` was found. + + +Examples +******** + +Using `find_package(FILESYSTEM)` with no component arguments: + +.. code-block:: cmake + + find_package(FILESYSTEM REQUIRED) + + add_executable(my-program main.cpp) + target_link_libraries(my-program PRIVATE std::filesystem) + + +#]=======================================================================] + + +if(TARGET std::filesystem) + # This module has already been processed. Don't do it again. + return() +endif() + +include(CMakePushCheckState) +include(CheckIncludeFileCXX) +include(CheckCXXSourceCompiles) + +cmake_push_check_state() + +set(CMAKE_REQUIRED_QUIET ${FILESYSTEM_FIND_QUIETLY}) + +# All of our tests require C++17 or later +set(OLD_CMAKE_CXX_STANDARD ${CMAKE_CXX_STANDARD}) +set(CMAKE_CXX_STANDARD 17) +if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "8.0.0")) + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8.99") + set(UNDEFINED_BEHAVIOR_WITHOUT_LINKING TRUE) + endif() + set(CMAKE_REQUIRED_FLAGS "-std=c++17") +endif() +if((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") AND NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8.99")) + set(CMAKE_REQUIRED_FLAGS "-std=c++17") +endif() +if((CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") AND NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "11")) + set(CMAKE_REQUIRED_FLAGS "-std=c++17") +endif() +if(MSVC AND NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "18")) + set(CMAKE_REQUIRED_FLAGS "/std:c++17") +endif() + +# Normalize and check the component list we were given +set(want_components ${FILESYSTEM_FIND_COMPONENTS}) +if(FILESYSTEM_FIND_COMPONENTS STREQUAL "") + set(want_components Final) +endif() + +# Warn on any unrecognized components +set(extra_components ${want_components}) +list(REMOVE_ITEM extra_components Final Experimental) +foreach(component IN LISTS extra_components) + message(WARNING "Extraneous find_package component for FILESYSTEM: ${component}") +endforeach() + +# Detect which of Experimental and Final we should look for +set(find_experimental TRUE) +set(find_final TRUE) +if(NOT "Final" IN_LIST want_components) + set(find_final FALSE) +endif() +if(NOT "Experimental" IN_LIST want_components) + set(find_experimental FALSE) +endif() + +if(find_final) + check_include_file_cxx("filesystem" _CXX_FILESYSTEM_HAVE_HEADER) + mark_as_advanced(_CXX_FILESYSTEM_HAVE_HEADER) + if(_CXX_FILESYSTEM_HAVE_HEADER) + # We found the non-experimental header. Don't bother looking for the + # experimental one. + set(find_experimental FALSE) + endif() +else() + set(_CXX_FILESYSTEM_HAVE_HEADER FALSE) +endif() + +if(find_experimental) + check_include_file_cxx("experimental/filesystem" _CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) + mark_as_advanced(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) +else() + set(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER FALSE) +endif() + +if(_CXX_FILESYSTEM_HAVE_HEADER) + set(_have_fs TRUE) + set(_fs_header filesystem) + set(_fs_namespace std::filesystem) +elseif(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) + set(_have_fs TRUE) + set(_fs_header experimental/filesystem) + set(_fs_namespace std::experimental::filesystem) +else() + set(_have_fs FALSE) +endif() + +set(CXX_FILESYSTEM_HAVE_FS ${_have_fs} CACHE BOOL "TRUE if we have the C++ filesystem headers") +set(CXX_FILESYSTEM_HEADER ${_fs_header} CACHE STRING "The header that should be included to obtain the filesystem APIs") +set(CXX_FILESYSTEM_NAMESPACE ${_fs_namespace} CACHE STRING "The C++ namespace that contains the filesystem APIs") + +set(_found FALSE) + +if(CXX_FILESYSTEM_HAVE_FS) + # We have some filesystem library available. Do link checks + string(CONFIGURE [[ + #include <@CXX_FILESYSTEM_HEADER@> + + int main() { + auto cwd = @CXX_FILESYSTEM_NAMESPACE@::current_path(); + return static_cast(cwd.string().size()); + } + ]] code @ONLY) + + # Try to compile a simple filesystem program without any linker flags + if(NOT UNDEFINED_BEHAVIOR_WITHOUT_LINKING) + check_cxx_source_compiles("${code}" CXX_FILESYSTEM_NO_LINK_NEEDED) + set(can_link ${CXX_FILESYSTEM_NO_LINK_NEEDED}) + endif() + + if(NOT CXX_FILESYSTEM_NO_LINK_NEEDED) + set(prev_libraries ${CMAKE_REQUIRED_LIBRARIES}) + set(CMAKE_REQUIRED_FLAGS "-std=c++17") + # Add the libstdc++ flag + set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lstdc++fs) + check_cxx_source_compiles("${code}" CXX_FILESYSTEM_STDCPPFS_NEEDED) + set(can_link ${CXX_FILESYSTEM_STDCPPFS_NEEDED}) + if(NOT CXX_FILESYSTEM_STDCPPFS_NEEDED) + # Try the libc++ flag + set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lc++fs) + check_cxx_source_compiles("${code}" CXX_FILESYSTEM_CPPFS_NEEDED) + set(can_link ${CXX_FILESYSTEM_CPPFS_NEEDED}) + endif() + endif() + + if(can_link) + if(CMAKE_VERSION VERSION_LESS 3.12) + add_library(std::filesystem INTERFACE IMPORTED GLOBAL) + else() + add_library(std::filesystem INTERFACE IMPORTED) + target_compile_features(std::filesystem INTERFACE cxx_std_17) + endif() + set(_found TRUE) + + if(CXX_FILESYSTEM_NO_LINK_NEEDED) + # Nothing to add... + elseif(CXX_FILESYSTEM_STDCPPFS_NEEDED) + target_link_libraries(std::filesystem INTERFACE -lstdc++fs) + elseif(CXX_FILESYSTEM_CPPFS_NEEDED) + target_link_libraries(std::filesystem INTERFACE -lc++fs) + endif() + endif() +endif() + +if(NOT ${_found}) + set(CMAKE_CXX_STANDARD ${OLD_CMAKE_CXX_STANDARD}) +endif() + +cmake_pop_check_state() + +set(FILESYSTEM_FOUND ${_found} CACHE BOOL "TRUE if we can compile and link a program using std::filesystem" FORCE) + +if(FILESYSTEM_FIND_REQUIRED AND NOT FILESYSTEM_FOUND) + message(FATAL_ERROR "Cannot compile a simple program using std::filesystem") +endif() diff --git a/cmake/Modules/FindORC.cmake b/cmake/Modules/FindORC.cmake new file mode 100644 index 0000000..c5e6172 --- /dev/null +++ b/cmake/Modules/FindORC.cmake @@ -0,0 +1,49 @@ +# Copyright 2014, 2019, 2020 Free Software Foundation, Inc. +# +# This file is part of VOLK. +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +FIND_PACKAGE(PkgConfig) +PKG_CHECK_MODULES(PC_ORC "orc-0.4 > 0.4.11") + + + + +FIND_PROGRAM(ORCC_EXECUTABLE orcc + HINTS ${PC_ORC_TOOLSDIR} + PATHS ${ORC_ROOT}/bin ${CMAKE_INSTALL_PREFIX}/bin) + +FIND_PATH(ORC_INCLUDE_DIR NAMES orc/orc.h + HINTS ${PC_ORC_INCLUDEDIR} + PATHS ${ORC_ROOT}/include ${CMAKE_INSTALL_PREFIX}/include + PATH_SUFFIXES orc-0.4) + + +FIND_PATH(ORC_LIBRARY_DIR NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}orc-0.4${CMAKE_SHARED_LIBRARY_SUFFIX} + HINTS ${PC_ORC_LIBDIR} + PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} ${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}) + +FIND_LIBRARY(ORC_LIB orc-0.4 + HINTS ${PC_ORC_LIBRARY_DIRS} + PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} ${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}) + +FIND_LIBRARY(ORC_LIBRARY_STATIC liborc-0.4.a + HINTS ${PC_ORC_LIBRARY_DIRS} + PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} ${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}) + +LIST(APPEND ORC_LIBRARY + ${ORC_LIB} +) + + +SET(ORC_INCLUDE_DIRS ${ORC_INCLUDE_DIR}) +SET(ORC_LIBRARIES ${ORC_LIBRARY}) +SET(ORC_LIBRARY_DIRS ${ORC_LIBRARY_DIR}) +SET(ORC_LIBRARIES_STATIC ${ORC_LIBRARY_STATIC}) + +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(ORC "orc files" ORC_LIBRARY ORC_INCLUDE_DIR ORCC_EXECUTABLE) + +mark_as_advanced(ORC_INCLUDE_DIR ORC_LIBRARY ORCC_EXECUTABLE) diff --git a/cmake/Modules/VolkAddTest.cmake b/cmake/Modules/VolkAddTest.cmake new file mode 100644 index 0000000..9aba67d --- /dev/null +++ b/cmake/Modules/VolkAddTest.cmake @@ -0,0 +1,208 @@ +# Copyright 2015 Free Software Foundation, Inc. +# +# This file is part of VOLK. +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +if(DEFINED __INCLUDED_VOLK_ADD_TEST) + return() +endif() +set(__INCLUDED_VOLK_ADD_TEST TRUE) + +######################################################################## +# Generate a test executable which can be used in ADD_TEST to call +# various subtests. +# +# SOURCES - sources for the test +# TARGET_DEPS - build target dependencies (e.g., libraries) +######################################################################## + +function(VOLK_GEN_TEST executable_name) + include(CMakeParseArgumentsCopy) + CMAKE_PARSE_ARGUMENTS(VOLK_TEST "" "" "SOURCES;TARGET_DEPS;EXTRA_LIB_DIRS;ENVIRONS;ARGS" ${ARGN}) + add_executable(${executable_name} ${VOLK_TEST_SOURCES}) + target_link_libraries(${executable_name} ${VOLK_TEST_TARGET_DEPS}) +endfunction() + +######################################################################## +# Add a unit test and setup the environment for it. +# Encloses ADD_TEST, with additional functionality to create a shell +# script that sets the environment to gain access to in-build binaries +# properly. The following variables are used to pass in settings: +# A test executable has to be generated with VOLK_GEN_TEST beforehand. +# The executable name has to be passed as argument. +# +# NAME - the test name +# TARGET_DEPS - build target dependencies (e.g., libraries) +# EXTRA_LIB_DIRS - other directories for the library path +# ENVIRONS - other environment key/value pairs +# ARGS - arguments for the test +######################################################################## +function(VOLK_ADD_TEST test_name executable_name) + + #parse the arguments for component names + include(CMakeParseArgumentsCopy) + CMAKE_PARSE_ARGUMENTS(VOLK_TEST "" "" "TARGET_DEPS;EXTRA_LIB_DIRS;ENVIRONS;ARGS" ${ARGN}) + + #set the initial environs to use + set(environs ${VOLK_TEST_ENVIRONS}) + + #create the initial library path + file(TO_NATIVE_PATH "${VOLK_TEST_EXTRA_LIB_DIRS}" libpath) + + #set the source directory, which is mostly FYI + file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR} srcdir) + list(APPEND environs "srcdir=\"${srcdir}\"") + + #http://www.cmake.org/pipermail/cmake/2009-May/029464.html + #Replaced this add test + set environs code with the shell script generation. + #Its nicer to be able to manually run the shell script to diagnose problems. + if(UNIX) + if(APPLE) + set(LD_PATH_VAR "DYLD_LIBRARY_PATH") + else() + set(LD_PATH_VAR "LD_LIBRARY_PATH") + endif() + + #create a list of target directories to be determined by the + #"add_test" command, via the $ operator; make sure the + #test's directory is first, since it ($1) is prepended to PATH. + unset(TARGET_DIR_LIST) + foreach(target ${executable_name} ${VOLK_TEST_TARGET_DEPS}) + list(APPEND TARGET_DIR_LIST "\$") + endforeach() + + #augment the PATH to start with the directory of the test + set(binpath "\"$1:\$PATH\"") + list(APPEND environs "PATH=${binpath}") + + #set the shell to use + if(CMAKE_CROSSCOMPILING) + set(SHELL "/bin/sh") + else() + find_program(SHELL sh) + endif() + + #check to see if the shell supports "$*" expansion with IFS + if(NOT TESTED_SHELL_SUPPORTS_IFS) + set(TESTED_SHELL_SUPPORTS_IFS TRUE CACHE BOOL "") + set(sh_file ${CMAKE_CURRENT_BINARY_DIR}/ifs_test.sh) + file(WRITE ${sh_file} "#!${SHELL}\n") + file(APPEND ${sh_file} "export IFS=:\n") + file(APPEND ${sh_file} "echo \"$*\"\n") + #make the shell file executable + execute_process(COMMAND chmod +x ${sh_file}) + + #execute the shell script + execute_process(COMMAND ${sh_file} "a" "b" "c" + OUTPUT_VARIABLE output OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + #check the output to see if it is correct + string(COMPARE EQUAL ${output} "a:b:c" SHELL_SUPPORTS_IFS) + set(SHELL_SUPPORTS_IFS ${SHELL_SUPPORTS_IFS} CACHE BOOL + "Set this value to TRUE if the shell supports IFS argument expansion" + ) + endif() + unset(testlibpath) + if(SHELL_SUPPORTS_IFS) + #"$*" expands in the shell into a list of all of the arguments + #to the shell script, concatenated using the character provided + #in ${IFS}. + list(APPEND testlibpath "$*") + else() + #shell does not support IFS expansion; use a loop instead + list(APPEND testlibpath "\${LL}") + endif() + + #finally: add in the current library path variable + list(INSERT libpath 0 ${testlibpath}) + list(APPEND libpath "$${LD_PATH_VAR}") + + #replace list separator with the path separator + string(REPLACE ";" ":" libpath "${libpath}") + list(APPEND environs "${LD_PATH_VAR}=\"${libpath}\"") + + #generate a shell script file that sets the environment and runs the test + set(sh_file ${CMAKE_CURRENT_BINARY_DIR}/${test_name}_test.sh) + file(WRITE ${sh_file} "#!${SHELL}\n") + if(SHELL_SUPPORTS_IFS) + file(APPEND ${sh_file} "export IFS=:\n") + else() + file(APPEND ${sh_file} "LL=\"$1\" && for tf in \"\$@\"; do LL=\"\${LL}:\${tf}\"; done\n") + endif() + + #each line sets an environment variable + foreach(environ ${environs}) + file(APPEND ${sh_file} "export ${environ}\n") + endforeach(environ) + + set(VOLK_TEST_ARGS "${test_name}") + + #redo the test args to have a space between each + string(REPLACE ";" " " VOLK_TEST_ARGS "${VOLK_TEST_ARGS}") + + #finally: append the test name to execute + file(APPEND ${sh_file} "${CMAKE_CROSSCOMPILING_EMULATOR} ${executable_name} ${VOLK_TEST_ARGS}\n") + + #make the shell file executable + execute_process(COMMAND chmod +x ${sh_file}) + + #add the shell file as the test to execute; + #use the form that allows for $ substitutions, + #then combine the script arguments inside the script. + add_test(NAME qa_${test_name} + COMMAND ${SHELL} ${sh_file} ${TARGET_DIR_LIST} + ) + + endif(UNIX) + + if(WIN32) + #In the land of windows, all libraries must be in the PATH. Since + #the dependent libraries are not yet installed, we must manually + #set them in the PATH to run tests. The following appends the + #path of a target dependency. + # + #create a list of target directories to be determined by the + #"add_test" command, via the $ operator; make sure the + #test's directory is first, since it ($1) is prepended to PATH. + unset(TARGET_DIR_LIST) + foreach(target ${executable_name} ${VOLK_TEST_TARGET_DEPS}) + list(APPEND TARGET_DIR_LIST "$") + endforeach() + #replace list separator with the path separator (escaped) + string(REPLACE ";" "\\\\;" TARGET_DIR_LIST "${TARGET_DIR_LIST}") + + #add command line argument (TARGET_DIR_LIST) to path and append current path + list(INSERT libpath 0 "%1") + list(APPEND libpath "%PATH%") + + #replace list separator with the path separator (escaped) + string(REPLACE ";" "\\;" libpath "${libpath}") + list(APPEND environs "PATH=${libpath}") + + #generate a bat file that sets the environment and runs the test + set(bat_file ${CMAKE_CURRENT_BINARY_DIR}/${test_name}_test.bat) + file(WRITE ${bat_file} "@echo off\n") + + #each line sets an environment variable + foreach(environ ${environs}) + file(APPEND ${bat_file} "SET ${environ}\n") + endforeach(environ) + + set(VOLK_TEST_ARGS "${test_name}") + + #redo the test args to have a space between each + string(REPLACE ";" " " VOLK_TEST_ARGS "${VOLK_TEST_ARGS}") + + #finally: append the test name to execute + file(APPEND ${bat_file} "${executable_name} ${VOLK_TEST_ARGS}\n") + file(APPEND ${bat_file} "\n") + + add_test(NAME qa_${test_name} + COMMAND ${bat_file} ${TARGET_DIR_LIST} + ) + endif(WIN32) + +endfunction(VOLK_ADD_TEST) diff --git a/cmake/Modules/VolkBuildTypes.cmake b/cmake/Modules/VolkBuildTypes.cmake new file mode 100644 index 0000000..7d6da8e --- /dev/null +++ b/cmake/Modules/VolkBuildTypes.cmake @@ -0,0 +1,199 @@ +# Copyright 2014 Free Software Foundation, Inc. +# +# This file is part of VOLK +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +if(DEFINED __INCLUDED_VOLK_BUILD_TYPES_CMAKE) + return() +endif() +set(__INCLUDED_VOLK_BUILD_TYPES_CMAKE TRUE) + +# Standard CMake Build Types and their basic CFLAGS: +# - None: nothing set +# - Debug: -O2 -g +# - Release: -O3 +# - RelWithDebInfo: -O3 -g +# - MinSizeRel: -Os + +# Additional Build Types, defined below: +# - NoOptWithASM: -O0 -g -save-temps +# - O2WithASM: -O2 -g -save-temps +# - O3WithASM: -O3 -g -save-temps +# - DebugParanoid -O0 -g -Werror + +# Defines the list of acceptable cmake build types. When adding a new +# build type below, make sure to add it to this list. +list(APPEND AVAIL_BUILDTYPES + None Debug Release RelWithDebInfo MinSizeRel + DebugParanoid NoOptWithASM O2WithASM O3WithASM + ASAN +) + +######################################################################## +# VOLK_CHECK_BUILD_TYPE(build type) +# +# Use this to check that the build type set in CMAKE_BUILD_TYPE on the +# commandline is one of the valid build types used by this project. It +# checks the value set in the cmake interface against the list of +# known build types in AVAIL_BUILDTYPES. If the build type is found, +# the function exits immediately. If nothing is found by the end of +# checking all available build types, we exit with an error and list +# the available build types. +######################################################################## +function(VOLK_CHECK_BUILD_TYPE settype) + STRING(TOUPPER ${settype} _settype) + foreach(btype ${AVAIL_BUILDTYPES}) + STRING(TOUPPER ${btype} _btype) + if(${_settype} STREQUAL ${_btype}) + return() # found it; exit cleanly + endif(${_settype} STREQUAL ${_btype}) + endforeach(btype) + # Build type not found; error out + message(FATAL_ERROR "Build type '${settype}' not valid, must be one of: ${AVAIL_BUILDTYPES}") +endfunction(VOLK_CHECK_BUILD_TYPE) + +######################################################################## +# For GCC and Clang, we can set a build type: +# +# -DCMAKE_BUILD_TYPE=DebugParanoid +# +# This type uses no optimization (-O0), outputs debug symbols (-g), warns +# on everything, and stops on warnings. +# NOTE: This is not defined on Windows systems. +######################################################################## +if(NOT WIN32) + SET(CMAKE_CXX_FLAGS_DEBUGPARANOID "-Wall -Wextra -g -O0" CACHE STRING + "Flags used by the C++ compiler during DebugParanoid builds." FORCE) + SET(CMAKE_C_FLAGS_DEBUGPARANOID "-Wall -Wextra -g -O0" CACHE STRING + "Flags used by the C compiler during DebugParanoid builds." FORCE) + SET(CMAKE_EXE_LINKER_FLAGS_DEBUGPARANOID + "-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING + "Flags used for linking binaries during NoOptWithASM builds." FORCE) + SET(CMAKE_SHARED_LINKER_FLAGS_DEBUGPARANOID + "-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING + "Flags used by the shared lib linker during NoOptWithASM builds." FORCE) + + MARK_AS_ADVANCED( + CMAKE_CXX_FLAGS_DEBUGPARANOID + CMAKE_C_FLAGS_DEBUGPARANOID + CMAKE_EXE_LINKER_FLAGS_DEBUGPARANOID + CMAKE_SHARED_LINKER_DEBUGPARANOID) +endif(NOT WIN32) + + +######################################################################## +# For GCC and Clang, we can set a build type: +# +# -DCMAKE_BUILD_TYPE=NoOptWithASM +# +# This type uses no optimization (-O0), outputs debug symbols (-g) and +# outputs all intermediary files the build system produces, including +# all assembly (.s) files. Look in the build directory for these +# files. +# NOTE: This is not defined on Windows systems. +######################################################################## +if(NOT WIN32) + SET(CMAKE_CXX_FLAGS_NOOPTWITHASM "-save-temps -g -O0" CACHE STRING + "Flags used by the C++ compiler during NoOptWithASM builds." FORCE) + SET(CMAKE_C_FLAGS_NOOPTWITHASM "-save-temps -g -O0" CACHE STRING + "Flags used by the C compiler during NoOptWithASM builds." FORCE) + SET(CMAKE_EXE_LINKER_FLAGS_NOOPTWITHASM + "-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING + "Flags used for linking binaries during NoOptWithASM builds." FORCE) + SET(CMAKE_SHARED_LINKER_FLAGS_NOOPTWITHASM + "-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING + "Flags used by the shared lib linker during NoOptWithASM builds." FORCE) + + MARK_AS_ADVANCED( + CMAKE_CXX_FLAGS_NOOPTWITHASM + CMAKE_C_FLAGS_NOOPTWITHASM + CMAKE_EXE_LINKER_FLAGS_NOOPTWITHASM + CMAKE_SHARED_LINKER_FLAGS_NOOPTWITHASM) +endif(NOT WIN32) + + +######################################################################## +# For GCC and Clang, we can set a build type: +# +# -DCMAKE_BUILD_TYPE=O2WithASM +# +# This type uses level 2 optimization (-O2), outputs debug symbols +# (-g) and outputs all intermediary files the build system produces, +# including all assembly (.s) files. Look in the build directory for +# these files. +# NOTE: This is not defined on Windows systems. +######################################################################## + +if(NOT WIN32) + SET(CMAKE_CXX_FLAGS_O2WITHASM "-save-temps -g -O2" CACHE STRING + "Flags used by the C++ compiler during O2WithASM builds." FORCE) + SET(CMAKE_C_FLAGS_O2WITHASM "-save-temps -g -O2" CACHE STRING + "Flags used by the C compiler during O2WithASM builds." FORCE) + SET(CMAKE_EXE_LINKER_FLAGS_O2WITHASM + "-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING + "Flags used for linking binaries during O2WithASM builds." FORCE) + SET(CMAKE_SHARED_LINKER_FLAGS_O2WITHASM + "-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING + "Flags used by the shared lib linker during O2WithASM builds." FORCE) + + MARK_AS_ADVANCED( + CMAKE_CXX_FLAGS_O2WITHASM + CMAKE_C_FLAGS_O2WITHASM + CMAKE_EXE_LINKER_FLAGS_O2WITHASM + CMAKE_SHARED_LINKER_FLAGS_O2WITHASM) +endif(NOT WIN32) + + +######################################################################## +# For GCC and Clang, we can set a build type: +# +# -DCMAKE_BUILD_TYPE=O3WithASM +# +# This type uses level 3 optimization (-O3), outputs debug symbols +# (-g) and outputs all intermediary files the build system produces, +# including all assembly (.s) files. Look in the build directory for +# these files. +# NOTE: This is not defined on Windows systems. +######################################################################## + +if(NOT WIN32) + SET(CMAKE_CXX_FLAGS_O3WITHASM "-save-temps -g -O3" CACHE STRING + "Flags used by the C++ compiler during O3WithASM builds." FORCE) + SET(CMAKE_C_FLAGS_O3WITHASM "-save-temps -g -O3" CACHE STRING + "Flags used by the C compiler during O3WithASM builds." FORCE) + SET(CMAKE_EXE_LINKER_FLAGS_O3WITHASM + "-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING + "Flags used for linking binaries during O3WithASM builds." FORCE) + SET(CMAKE_SHARED_LINKER_FLAGS_O3WITHASM + "-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING + "Flags used by the shared lib linker during O3WithASM builds." FORCE) + + MARK_AS_ADVANCED( + CMAKE_CXX_FLAGS_O3WITHASM + CMAKE_C_FLAGS_O3WITHASM + CMAKE_EXE_LINKER_FLAGS_O3WITHASM + CMAKE_SHARED_LINKER_FLAGS_O3WITHASM) +endif(NOT WIN32) + +######################################################################## +# For GCC and Clang, we can set a build type: +# +# -DCMAKE_BUILD_TYPE=ASAN +# +# This type creates an address sanitized build (-fsanitize=address) +# and defaults to the DebugParanoid linker flags. +# NOTE: This is not defined on Windows systems. +######################################################################## +if(NOT WIN32) + SET(CMAKE_CXX_FLAGS_ASAN "-Wall -Wextra -g -O2 -fsanitize=address -fno-omit-frame-pointer" CACHE STRING + "Flags used by the C++ compiler during Address Sanitized builds." FORCE) + SET(CMAKE_C_FLAGS_ASAN "-Wall -Wextra -g -O2 -fsanitize=address -fno-omit-frame-pointer" CACHE STRING + "Flags used by the C compiler during Address Sanitized builds." FORCE) + MARK_AS_ADVANCED( + CMAKE_CXX_FLAGS_ASAN + CMAKE_C_FLAGS_ASAN + CMAKE_EXE_LINKER_FLAGS_DEBUGPARANOID + CMAKE_SHARED_LINKER_DEBUGPARANOID) +endif(NOT WIN32) diff --git a/cmake/Modules/VolkConfig.cmake.in b/cmake/Modules/VolkConfig.cmake.in new file mode 100644 index 0000000..3ffdf36 --- /dev/null +++ b/cmake/Modules/VolkConfig.cmake.in @@ -0,0 +1,41 @@ +# Copyright 2016, 2018 - 2020 Free Software Foundation, Inc. +# +# This file is part of VOLK. +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +get_filename_component(VOLK_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) + +if(NOT TARGET Volk::volk) + include("${VOLK_CMAKE_DIR}/VolkTargets.cmake") +endif() + +# set VOLK_FOUND to be set globally, for whether a compatible Volk was +# found -- could be a correct enough version or any version depending +# on how find_package was called. +if(NOT TARGET Volk::volk) + set(VOLK_FOUND FALSE) +else() + set(VOLK_FOUND TRUE) +endif() + +# cache whether a compatible Volk was found for +# use anywhere in the calling project +set(VOLK_FOUND ${VOLK_FOUND} CACHE BOOL "Whether a compatible Volk was found" FORCE) + +if(VOLK_FOUND) + # use the new target library, regardless of whether new or old style + # we still need to set a variable with the library name so that there + # is a variable to reference in the using-project's cmake scripts! + set(VOLK_LIBRARIES Volk::volk CACHE STRING "Volk Library" FORCE) + + # INTERFACE_INCLUDE_DIRECTORIES should always be set + get_target_property(VOLK_INCLUDE_DIRS Volk::volk INTERFACE_INCLUDE_DIRECTORIES) + set(VOLK_INCLUDE_DIRS ${VOLK_INCLUDE_DIRS} CACHE STRING "Volk Include Directories" FORCE) + + # for backward compatibility with old-CMake non-target project finding + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(Volk DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS) + mark_as_advanced(VOLK_LIBRARIES VOLK_INCLUDE_DIRS) +endif(VOLK_FOUND) diff --git a/cmake/Modules/VolkConfigVersion.cmake.in b/cmake/Modules/VolkConfigVersion.cmake.in new file mode 100644 index 0000000..b6e9b51 --- /dev/null +++ b/cmake/Modules/VolkConfigVersion.cmake.in @@ -0,0 +1,21 @@ +# Copyright 2014, 2015, 2018, 2020 Free Software Foundation, Inc. +# +# This file is part of VOLK. +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +set(MAJOR_VERSION @VERSION_INFO_MAJOR_VERSION@) +set(MINOR_VERSION @VERSION_INFO_MINOR_VERSION@) +set(MAINT_VERSION @VERSION_INFO_MAINT_VERSION@) + +set(PACKAGE_VERSION ${MAJOR_VERSION}.${MINOR_VERSION}.${MAINT_VERSION}) + +if(${PACKAGE_FIND_VERSION_MAJOR} EQUAL ${MAJOR_VERSION}) + if(${PACKAGE_FIND_VERSION_MINOR} EQUAL ${MINOR_VERSION}) + if(NOT ${PACKAGE_FIND_VERSION_PATCH} GREATER ${MAINT_VERSION}) + set(PACKAGE_VERSION_EXACT 1) # exact match for API version + set(PACKAGE_VERSION_COMPATIBLE 1) # compat for minor/patch version + endif() + endif() +endif() diff --git a/cmake/Modules/VolkPython.cmake b/cmake/Modules/VolkPython.cmake new file mode 100644 index 0000000..5ab8546 --- /dev/null +++ b/cmake/Modules/VolkPython.cmake @@ -0,0 +1,264 @@ +# Copyright 2010-2011,2013 Free Software Foundation, Inc. +# +# This file is part of VOLK. +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +if(DEFINED __INCLUDED_VOLK_PYTHON_CMAKE) + return() +endif() +set(__INCLUDED_VOLK_PYTHON_CMAKE TRUE) + +######################################################################## +# Setup the python interpreter: +# This allows the user to specify a specific interpreter, +# or finds the interpreter via the built-in cmake module. +######################################################################## +#this allows the user to override PYTHON_EXECUTABLE + +# `PythonInterp` is deprecated in 3.12, we should use `Python3` instead. +# The `Python3` CMake module is introduced in CMake 3.12, e.g. Ubuntu 18.04 comes with CMake 3.10 though. +# https://cmake.org/cmake/help/latest/module/FindPythonInterp.html +# https://cmake.org/cmake/help/latest/module/FindPython3.html#module:FindPython3 + +# FUTURE TODO: With CMake 3.12+ we can simply do: +#if(PYTHON_EXECUTABLE) +# set(Python_EXECUTABLE ${PYTHON_EXECUTABLE}) +#else(PYTHON_EXECUTABLE) +# find_package(Python3 COMPONENTS Interpreter) +#endif(PYTHON_EXECUTABLE) +# +##make the path to the executable appear in the cmake gui +#set(PYTHON_EXECUTABLE ${Python_EXECUTABLE} CACHE FILEPATH "python interpreter") +# END FUTURE TODO: Stick with following version as long as we set CMake 3.8 minimum. + +if(PYTHON_EXECUTABLE) + + set(PYTHONINTERP_FOUND TRUE) + +#otherwise if not set, try to automatically find it +else(PYTHON_EXECUTABLE) + + #use the built-in find script + set(Python_ADDITIONAL_VERSIONS 3.4 3.5 3.6 3.7 3.8) + find_package(PythonInterp 3) + + #and if that fails use the find program routine + if(NOT PYTHONINTERP_FOUND) + find_program(PYTHON_EXECUTABLE NAMES python3 python) + if(PYTHON_EXECUTABLE) + set(PYTHONINTERP_FOUND TRUE) + endif(PYTHON_EXECUTABLE) + endif(NOT PYTHONINTERP_FOUND) + +endif(PYTHON_EXECUTABLE) + +#make the path to the executable appear in the cmake gui +set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE} CACHE FILEPATH "python interpreter") + + +######################################################################## +# Check for the existence of a python module: +# - desc a string description of the check +# - mod the name of the module to import +# - cmd an additional command to run +# - have the result variable to set +######################################################################## +macro(VOLK_PYTHON_CHECK_MODULE desc mod cmd have) + message(STATUS "") + message(STATUS "Python checking for ${desc}") + execute_process( + COMMAND ${PYTHON_EXECUTABLE} -c " +######################################### +try: import ${mod} +except: + try: ${mod} + except: exit(-1) +try: assert ${cmd} +except: exit(-1) +#########################################" + RESULT_VARIABLE ${have} + ) + if(${have} EQUAL 0) + message(STATUS "Python checking for ${desc} - found") + set(${have} TRUE) + else(${have} EQUAL 0) + message(STATUS "Python checking for ${desc} - not found") + set(${have} FALSE) + endif(${have} EQUAL 0) +endmacro(VOLK_PYTHON_CHECK_MODULE) + +######################################################################## +# Sets the python installation directory VOLK_PYTHON_DIR +# cf. https://github.com/gnuradio/gnuradio/blob/master/cmake/Modules/GrPython.cmake +# From https://github.com/pothosware/SoapySDR/tree/master/python +# https://github.com/pothosware/SoapySDR/blob/master/LICENSE_1_0.txt +######################################################################## +if(NOT DEFINED VOLK_PYTHON_DIR) +execute_process( + COMMAND ${PYTHON_EXECUTABLE} -c "import os +import sysconfig +import site + +install_dir = None +# The next line passes a CMake variable into our script. +prefix = '${CMAKE_INSTALL_PREFIX}' + +# We use `site` to identify if our chosen prefix is a default one. +# https://docs.python.org/3/library/site.html +try: + # https://docs.python.org/3/library/site.html#site.getsitepackages + paths = [p for p in site.getsitepackages() if p.startswith(prefix)] + if len(paths) == 1: install_dir = paths[0] +except AttributeError: pass + +# If we found a default install path, `install_dir` is set. +if not install_dir: + # We use a custom install prefix! + # Determine the correct install path in that prefix on the current platform. + # For Python 3.11+, we could use the 'venv' scheme for all platforms + # https://docs.python.org/3.11/library/sysconfig.html#installation-paths + if os.name == 'nt': + scheme = 'nt' + else: + scheme = 'posix_prefix' + install_dir = sysconfig.get_path('platlib', scheme) + prefix = sysconfig.get_path('data', scheme) + +#strip the prefix to return a relative path +print(os.path.relpath(install_dir, prefix))" + OUTPUT_STRIP_TRAILING_WHITESPACE + OUTPUT_VARIABLE VOLK_PYTHON_DIR +) +endif() +file(TO_CMAKE_PATH ${VOLK_PYTHON_DIR} VOLK_PYTHON_DIR) + +######################################################################## +# Create an always-built target with a unique name +# Usage: VOLK_UNIQUE_TARGET( ) +######################################################################## +function(VOLK_UNIQUE_TARGET desc) + file(RELATIVE_PATH reldir ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import re, hashlib +unique = hashlib.sha256(b'${reldir}${ARGN}').hexdigest()[:5] +print(re.sub('\\W', '_', '${desc} ${reldir} ' + unique))" + OUTPUT_VARIABLE _target OUTPUT_STRIP_TRAILING_WHITESPACE) + add_custom_target(${_target} ALL DEPENDS ${ARGN}) +endfunction(VOLK_UNIQUE_TARGET) + +######################################################################## +# Install python sources (also builds and installs byte-compiled python) +######################################################################## +function(VOLK_PYTHON_INSTALL) + include(CMakeParseArgumentsCopy) + CMAKE_PARSE_ARGUMENTS(VOLK_PYTHON_INSTALL "" "DESTINATION;COMPONENT" "FILES;PROGRAMS" ${ARGN}) + + #################################################################### + if(VOLK_PYTHON_INSTALL_FILES) + #################################################################### + install(${ARGN}) #installs regular python files + + #create a list of all generated files + unset(pysrcfiles) + unset(pycfiles) + unset(pyofiles) + foreach(pyfile ${VOLK_PYTHON_INSTALL_FILES}) + get_filename_component(pyfile ${pyfile} ABSOLUTE) + list(APPEND pysrcfiles ${pyfile}) + + #determine if this file is in the source or binary directory + file(RELATIVE_PATH source_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${pyfile}) + string(LENGTH "${source_rel_path}" source_rel_path_len) + file(RELATIVE_PATH binary_rel_path ${CMAKE_CURRENT_BINARY_DIR} ${pyfile}) + string(LENGTH "${binary_rel_path}" binary_rel_path_len) + + #and set the generated path appropriately + if(${source_rel_path_len} GREATER ${binary_rel_path_len}) + set(pygenfile ${CMAKE_CURRENT_BINARY_DIR}/${binary_rel_path}) + else() + set(pygenfile ${CMAKE_CURRENT_BINARY_DIR}/${source_rel_path}) + endif() + list(APPEND pycfiles ${pygenfile}c) + list(APPEND pyofiles ${pygenfile}o) + + #ensure generation path exists + get_filename_component(pygen_path ${pygenfile} PATH) + file(MAKE_DIRECTORY ${pygen_path}) + + endforeach(pyfile) + + #the command to generate the pyc files + add_custom_command( + DEPENDS ${pysrcfiles} OUTPUT ${pycfiles} + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_BINARY_DIR}/python_compile_helper.py ${pysrcfiles} ${pycfiles} + ) + + #the command to generate the pyo files + add_custom_command( + DEPENDS ${pysrcfiles} OUTPUT ${pyofiles} + COMMAND ${PYTHON_EXECUTABLE} -O ${CMAKE_BINARY_DIR}/python_compile_helper.py ${pysrcfiles} ${pyofiles} + ) + + #create install rule and add generated files to target list + set(python_install_gen_targets ${pycfiles} ${pyofiles}) + install(FILES ${python_install_gen_targets} + DESTINATION ${VOLK_PYTHON_INSTALL_DESTINATION} + COMPONENT ${VOLK_PYTHON_INSTALL_COMPONENT} + ) + + + #################################################################### + elseif(VOLK_PYTHON_INSTALL_PROGRAMS) + #################################################################### + file(TO_NATIVE_PATH ${PYTHON_EXECUTABLE} pyexe_native) + + if (CMAKE_CROSSCOMPILING) + set(pyexe_native "/usr/bin/env python") + endif() + + foreach(pyfile ${VOLK_PYTHON_INSTALL_PROGRAMS}) + get_filename_component(pyfile_name ${pyfile} NAME) + get_filename_component(pyfile ${pyfile} ABSOLUTE) + string(REPLACE "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" pyexefile "${pyfile}.exe") + list(APPEND python_install_gen_targets ${pyexefile}) + + get_filename_component(pyexefile_path ${pyexefile} PATH) + file(MAKE_DIRECTORY ${pyexefile_path}) + + add_custom_command( + OUTPUT ${pyexefile} DEPENDS ${pyfile} + COMMAND ${PYTHON_EXECUTABLE} -c + "open('${pyexefile}','w').write(r'\#!${pyexe_native}'+'\\n'+open('${pyfile}').read())" + COMMENT "Shebangin ${pyfile_name}" + VERBATIM + ) + + #on windows, python files need an extension to execute + get_filename_component(pyfile_ext ${pyfile} EXT) + if(WIN32 AND NOT pyfile_ext) + set(pyfile_name "${pyfile_name}.py") + endif() + + install(PROGRAMS ${pyexefile} RENAME ${pyfile_name} + DESTINATION ${VOLK_PYTHON_INSTALL_DESTINATION} + COMPONENT ${VOLK_PYTHON_INSTALL_COMPONENT} + ) + endforeach(pyfile) + + endif() + + VOLK_UNIQUE_TARGET("pygen" ${python_install_gen_targets}) + +endfunction(VOLK_PYTHON_INSTALL) + +######################################################################## +# Write the python helper script that generates byte code files +######################################################################## +file(WRITE ${CMAKE_BINARY_DIR}/python_compile_helper.py " +import sys, py_compile +files = sys.argv[1:] +srcs, gens = files[:len(files)//2], files[len(files)//2:] +for src, gen in zip(srcs, gens): + py_compile.compile(file=src, cfile=gen, doraise=True) +") diff --git a/cmake/Modules/VolkVersion.cmake b/cmake/Modules/VolkVersion.cmake new file mode 100644 index 0000000..18d6a59 --- /dev/null +++ b/cmake/Modules/VolkVersion.cmake @@ -0,0 +1,73 @@ +# Copyright 2014 Free Software Foundation, Inc. +# +# This file is part of VOLK. +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +if(DEFINED __INCLUDED_VOLK_VERSION_CMAKE) + return() +endif() +set(__INCLUDED_VOLK_VERSION_CMAKE TRUE) + +#eventually, replace version.sh and fill in the variables below +set(MAJOR_VERSION ${VERSION_INFO_MAJOR_VERSION}) +set(MINOR_VERSION ${VERSION_INFO_MINOR_VERSION}) +set(MAINT_VERSION ${VERSION_INFO_MAINT_VERSION}) + +######################################################################## +# Extract the version string from git describe. +######################################################################## +find_package(Git) + +if(GIT_FOUND AND EXISTS ${CMAKE_SOURCE_DIR}/.git) + message(STATUS "Extracting version information from git describe...") + execute_process( + COMMAND ${GIT_EXECUTABLE} describe --always --abbrev=8 --long + OUTPUT_VARIABLE GIT_DESCRIBE OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + ) +else() + if(NOT VOLK_GIT_COUNT) + set(VOLK_GIT_COUNT "0") + endif() + + if(NOT VOLK_GIT_HASH) + set(VOLK_GIT_HASH "unknown") + endif() + + set(GIT_DESCRIBE "v${MAJOR_VERSION}.${MINOR_VERSION}-${VOLK_GIT_COUNT}-${VOLK_GIT_HASH}") +endif() + +######################################################################## +# Use the logic below to set the version constants +######################################################################## +if("${MINOR_VERSION}" STREQUAL "git") + # VERSION: 1.0git-xxx-gxxxxxxxx + # DOCVER: 1.0git + # SOVERSION: 1.0git + set(VERSION "${GIT_DESCRIBE}") + set(DOCVER "${MAJOR_VERSION}.0${MINOR_VERSION}") + set(SOVERSION "${MAJOR_VERSION}.0${MINOR_VERSION}") + set(RC_MINOR_VERSION "0") + set(RC_MAINT_VERSION "0") +elseif("${MAINT_VERSION}" STREQUAL "git") + # VERSION: 1.xgit-xxx-gxxxxxxxx + # DOCVER: 1.xgit + # SOVERSION: 1.xgit + set(VERSION "${GIT_DESCRIBE}") + set(DOCVER "${MAJOR_VERSION}.${MINOR_VERSION}${MAINT_VERSION}") + set(SOVERSION "${MAJOR_VERSION}.${MINOR_VERSION}${MAINT_VERSION}") + math(EXPR RC_MINOR_VERSION "${MINOR_VERSION} - 1") + set(RC_MAINT_VERSION "0") +else() + # This is a numbered release. + # VERSION: 1.1{.x} + # DOCVER: 1.1{.x} + # SOVERSION: 1.1.0 + set(VERSION "${MAJOR_VERSION}.${MINOR_VERSION}.${MAINT_VERSION}") + set(DOCVER "${VERSION}") + set(SOVERSION "${MAJOR_VERSION}.${MINOR_VERSION}") + set(RC_MINOR_VERSION ${MINOR_VERSION}) + set(RC_MAINT_VERSION ${MAINT_VERSION}) +endif() diff --git a/cmake/Toolchains/aarch64-linux-gnu.cmake b/cmake/Toolchains/aarch64-linux-gnu.cmake new file mode 100644 index 0000000..342d542 --- /dev/null +++ b/cmake/Toolchains/aarch64-linux-gnu.cmake @@ -0,0 +1,33 @@ +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR aarch64) + +if(MINGW OR CYGWIN OR WIN32) + set(UTIL_SEARCH_CMD where) +elseif(UNIX OR APPLE) + set(UTIL_SEARCH_CMD which) +endif() + +set(TOOLCHAIN_PREFIX aarch64-linux-gnu-) + +execute_process( + COMMAND ${UTIL_SEARCH_CMD} ${TOOLCHAIN_PREFIX}gcc + OUTPUT_VARIABLE BINUTILS_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + +get_filename_component(ARM_TOOLCHAIN_DIR ${BINUTILS_PATH} DIRECTORY) + +# The following is not needed on debian +# Without that flag CMake is not able to pass test compilation check +#set(CMAKE_EXE_LINKER_FLAGS_INIT "--specs=nosys.specs") + +set(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}gcc) +set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER}) +set(CMAKE_CXX_COMPILER ${TOOLCHAIN_PREFIX}g++) + +set(CMAKE_OBJCOPY ${ARM_TOOLCHAIN_DIR}/${TOOLCHAIN_PREFIX}objcopy CACHE INTERNAL "objcopy tool") +set(CMAKE_SIZE_UTIL ${ARM_TOOLCHAIN_DIR}/${TOOLCHAIN_PREFIX}size CACHE INTERNAL "size tool") + +set(CMAKE_FIND_ROOT_PATH ${BINUTILS_PATH}) + +set(CMAKE_CROSSCOMPILING_EMULATOR "qemu-aarch64 -L /usr/aarch64-linux-gnu/") diff --git a/cmake/Toolchains/arm-linux-gnueabihf.cmake b/cmake/Toolchains/arm-linux-gnueabihf.cmake new file mode 100644 index 0000000..c34eb00 --- /dev/null +++ b/cmake/Toolchains/arm-linux-gnueabihf.cmake @@ -0,0 +1,36 @@ +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR arm) + +if(MINGW OR CYGWIN OR WIN32) + set(UTIL_SEARCH_CMD where) +elseif(UNIX OR APPLE) + set(UTIL_SEARCH_CMD which) +endif() + +set(TOOLCHAIN_PREFIX arm-linux-gnueabihf-) + +execute_process( + COMMAND ${UTIL_SEARCH_CMD} ${TOOLCHAIN_PREFIX}gcc + OUTPUT_VARIABLE BINUTILS_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + +get_filename_component(ARM_TOOLCHAIN_DIR ${BINUTILS_PATH} DIRECTORY) + +# The following is not needed on debian +# Without that flag CMake is not able to pass test compilation check +#set(CMAKE_EXE_LINKER_FLAGS_INIT "--specs=nosys.specs") + +set(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}gcc) +set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER}) +set(CMAKE_CXX_COMPILER ${TOOLCHAIN_PREFIX}g++) +## the following is needed for CheckCSourceCompiles used in lib/CMakeLists.txt +set(CMAKE_C_FLAGS "-mfpu=neon" CACHE STRING "" FORCE) +set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "" FORCE) + +set(CMAKE_OBJCOPY ${ARM_TOOLCHAIN_DIR}/${TOOLCHAIN_PREFIX}objcopy CACHE INTERNAL "objcopy tool") +set(CMAKE_SIZE_UTIL ${ARM_TOOLCHAIN_DIR}/${TOOLCHAIN_PREFIX}size CACHE INTERNAL "size tool") + +set(CMAKE_FIND_ROOT_PATH ${BINUTILS_PATH}) + +set(CMAKE_CROSSCOMPILING_EMULATOR "qemu-arm -L /usr/arm-linux-gnueabihf/") diff --git a/cmake/Toolchains/arm_cortex_a15_hardfp_native.cmake b/cmake/Toolchains/arm_cortex_a15_hardfp_native.cmake new file mode 100644 index 0000000..23b7d08 --- /dev/null +++ b/cmake/Toolchains/arm_cortex_a15_hardfp_native.cmake @@ -0,0 +1,9 @@ +######################################################################## +# Toolchain file for building native on a ARM Cortex A8 w/ NEON +# Usage: cmake -DCMAKE_TOOLCHAIN_FILE= +######################################################################## +set(CMAKE_CXX_COMPILER g++) +set(CMAKE_C_COMPILER gcc) +set(CMAKE_CXX_FLAGS "-march=armv7-a -mtune=cortex-a15 -mfpu=neon -mfloat-abi=hard" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING "" FORCE) #same flags for C sources +set(CMAKE_ASM_FLAGS "${CMAKE_CXX_FLAGS} -g" CACHE STRING "" FORCE) #same flags for asm sources \ No newline at end of file diff --git a/cmake/Toolchains/arm_cortex_a53_hardfp_native.cmake b/cmake/Toolchains/arm_cortex_a53_hardfp_native.cmake new file mode 100644 index 0000000..2f97be9 --- /dev/null +++ b/cmake/Toolchains/arm_cortex_a53_hardfp_native.cmake @@ -0,0 +1,9 @@ +######################################################################## +# Toolchain file for building native on a ARM Cortex A53 w/ NEON +# Usage: cmake -DCMAKE_TOOLCHAIN_FILE= +######################################################################## +set(CMAKE_CXX_COMPILER g++) +set(CMAKE_C_COMPILER gcc) +set(CMAKE_CXX_FLAGS "-march=armv8-a -mtune=cortex-a53 -mfpu=neon-fp-armv8 -mfloat-abi=hard" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING "" FORCE) #same flags for C sources +set(CMAKE_ASM_FLAGS "${CMAKE_CXX_FLAGS} -mthumb -g" CACHE STRING "" FORCE) #same flags for asm sources diff --git a/cmake/Toolchains/arm_cortex_a72_hardfp_native.cmake b/cmake/Toolchains/arm_cortex_a72_hardfp_native.cmake new file mode 100644 index 0000000..085f5dc --- /dev/null +++ b/cmake/Toolchains/arm_cortex_a72_hardfp_native.cmake @@ -0,0 +1,9 @@ +######################################################################## +# Toolchain file for building native on a ARM Cortex A72 w/ NEON +# Usage: cmake -DCMAKE_TOOLCHAIN_FILE= +######################################################################## +set(CMAKE_CXX_COMPILER g++) +set(CMAKE_C_COMPILER gcc) +set(CMAKE_CXX_FLAGS "-march=armv8-a -mtune=cortex-a72 -mfpu=neon-fp-armv8 -mfloat-abi=hard" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING "" FORCE) #same flags for C sources +set(CMAKE_ASM_FLAGS "${CMAKE_CXX_FLAGS} -mthumb -g" CACHE STRING "" FORCE) #same flags for asm sources diff --git a/cmake/Toolchains/arm_cortex_a8_hardfp_native.cmake b/cmake/Toolchains/arm_cortex_a8_hardfp_native.cmake new file mode 100644 index 0000000..4a5e82d --- /dev/null +++ b/cmake/Toolchains/arm_cortex_a8_hardfp_native.cmake @@ -0,0 +1,9 @@ +######################################################################## +# Toolchain file for building native on a ARM Cortex A8 w/ NEON +# Usage: cmake -DCMAKE_TOOLCHAIN_FILE= +######################################################################## +set(CMAKE_CXX_COMPILER g++) +set(CMAKE_C_COMPILER gcc) +set(CMAKE_CXX_FLAGS "-march=armv7-a -mtune=cortex-a8 -mfpu=neon -mfloat-abi=hard" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING "" FORCE) #same flags for C sources +set(CMAKE_ASM_FLAGS "${CMAKE_CXX_FLAGS} -g" CACHE STRING "" FORCE) #same flags for asm sources \ No newline at end of file diff --git a/cmake/Toolchains/arm_cortex_a8_softfp_native.cmake b/cmake/Toolchains/arm_cortex_a8_softfp_native.cmake new file mode 100644 index 0000000..8305975 --- /dev/null +++ b/cmake/Toolchains/arm_cortex_a8_softfp_native.cmake @@ -0,0 +1,9 @@ +######################################################################## +# Toolchain file for building native on a ARM Cortex A8 w/ NEON +# Usage: cmake -DCMAKE_TOOLCHAIN_FILE= +######################################################################## +set(CMAKE_CXX_COMPILER g++) +set(CMAKE_C_COMPILER gcc) +set(CMAKE_CXX_FLAGS "-march=armv7-a -mtune=cortex-a8 -mfpu=neon -mfloat-abi=softfp" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING "" FORCE) #same flags for C sources +set(CMAKE_ASM_FLAGS "${CMAKE_CXX_FLAGS} -g" CACHE STRING "" FORCE) #same flags for asm sources \ No newline at end of file diff --git a/cmake/Toolchains/arm_cortex_a9_hardfp_native.cmake b/cmake/Toolchains/arm_cortex_a9_hardfp_native.cmake new file mode 100644 index 0000000..d3423fb --- /dev/null +++ b/cmake/Toolchains/arm_cortex_a9_hardfp_native.cmake @@ -0,0 +1,9 @@ +######################################################################## +# Toolchain file for building native on a ARM Cortex A8 w/ NEON +# Usage: cmake -DCMAKE_TOOLCHAIN_FILE= +######################################################################## +set(CMAKE_CXX_COMPILER g++) +set(CMAKE_C_COMPILER gcc) +set(CMAKE_CXX_FLAGS "-march=armv7-a -mtune=cortex-a9 -mfpu=neon -mfloat-abi=hard" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING "" FORCE) #same flags for C sources +set(CMAKE_ASM_FLAGS "${CMAKE_CXX_FLAGS} -g" CACHE STRING "" FORCE) #same flags for asm sources \ No newline at end of file diff --git a/cmake/Toolchains/intel-sde.cmake b/cmake/Toolchains/intel-sde.cmake new file mode 100644 index 0000000..bdda0ce --- /dev/null +++ b/cmake/Toolchains/intel-sde.cmake @@ -0,0 +1,3 @@ +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=knl") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=knl") +set(CMAKE_CROSSCOMPILING_EMULATOR "$ENV{TRAVIS_BUILD_DIR}/cache/$ENV{SDE_VERSION}/sde64 -knl --") diff --git a/cmake/Toolchains/oe-sdk_cross.cmake b/cmake/Toolchains/oe-sdk_cross.cmake new file mode 100644 index 0000000..788a22b --- /dev/null +++ b/cmake/Toolchains/oe-sdk_cross.cmake @@ -0,0 +1,15 @@ +set( CMAKE_SYSTEM_NAME Linux ) +#set( CMAKE_C_COMPILER $ENV{CC} ) +#set( CMAKE_CXX_COMPILER $ENV{CXX} ) +string(REGEX MATCH "sysroots/([a-zA-Z0-9]+)" CMAKE_SYSTEM_PROCESSOR $ENV{SDKTARGETSYSROOT}) +string(REGEX REPLACE "sysroots/" "" CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}) +set( CMAKE_CXX_FLAGS $ENV{CXXFLAGS} CACHE STRING "" FORCE ) +set( CMAKE_C_FLAGS $ENV{CFLAGS} CACHE STRING "" FORCE ) #same flags for C sources +set( CMAKE_LDFLAGS_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING "" FORCE ) #same flags for C sources +set( CMAKE_LIBRARY_PATH ${OECORE_TARGET_SYSROOT}/usr/lib ) +set( CMAKE_FIND_ROOT_PATH $ENV{OECORE_TARGET_SYSROOT} $ENV{OECORE_NATIVE_SYSROOT} ) +set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER ) +set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY ) +set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY ) +set ( ORC_INCLUDE_DIRS $ENV{OECORE_TARGET_SYSROOT}/usr/include/orc-0.4 ) +set ( ORC_LIBRARY_DIRS $ENV{OECORE_TARGET_SYSROOT}/usr/lib ) diff --git a/cmake/cmake_uninstall.cmake.in b/cmake/cmake_uninstall.cmake.in new file mode 100644 index 0000000..d9d13ea --- /dev/null +++ b/cmake/cmake_uninstall.cmake.in @@ -0,0 +1,39 @@ +# Copyright 2014 Free Software Foundation, Inc. +# +# This file is part of VOLK. +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +# http://www.vtk.org/Wiki/CMake_FAQ#Can_I_do_.22make_uninstall.22_with_CMake.3F + +IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") + MESSAGE(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"") +ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") + +FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) +STRING(REGEX REPLACE "\n" ";" files "${files}") +FOREACH(file ${files}) + MESSAGE(STATUS "Uninstalling \"$ENV{DESTDIR}${file}\"") + IF(EXISTS "$ENV{DESTDIR}${file}") + EXEC_PROGRAM( + "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" + OUTPUT_VARIABLE rm_out + RETURN_VALUE rm_retval + ) + IF(NOT "${rm_retval}" STREQUAL 0) + MESSAGE(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"") + ENDIF(NOT "${rm_retval}" STREQUAL 0) + ELSEIF(IS_SYMLINK "$ENV{DESTDIR}${file}") + EXEC_PROGRAM( + "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" + OUTPUT_VARIABLE rm_out + RETURN_VALUE rm_retval + ) + IF(NOT "${rm_retval}" STREQUAL 0) + MESSAGE(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"") + ENDIF(NOT "${rm_retval}" STREQUAL 0) + ELSE(EXISTS "$ENV{DESTDIR}${file}") + MESSAGE(STATUS "File \"$ENV{DESTDIR}${file}\" does not exist.") + ENDIF(EXISTS "$ENV{DESTDIR}${file}") +ENDFOREACH(file) diff --git a/cmake/msvc/config.h b/cmake/msvc/config.h new file mode 100644 index 0000000..68f716e --- /dev/null +++ b/cmake/msvc/config.h @@ -0,0 +1,58 @@ +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_CONFIG_H_ // [ +#define _MSC_CONFIG_H_ + +//////////////////////////////////////////////////////////////////////// +// enable inline functions for C code +//////////////////////////////////////////////////////////////////////// +#ifndef __cplusplus +#define inline __inline +#endif + +//////////////////////////////////////////////////////////////////////// +// signed size_t +//////////////////////////////////////////////////////////////////////// +#include +typedef ptrdiff_t ssize_t; + +//////////////////////////////////////////////////////////////////////// +// rint functions +//////////////////////////////////////////////////////////////////////// +#if _MSC_VER < 1800 +#include +static inline long lrint(double x) { return (long)(x > 0.0 ? x + 0.5 : x - 0.5); } +static inline long lrintf(float x) { return (long)(x > 0.0f ? x + 0.5f : x - 0.5f); } +static inline long long llrint(double x) +{ + return (long long)(x > 0.0 ? x + 0.5 : x - 0.5); +} +static inline long long llrintf(float x) +{ + return (long long)(x > 0.0f ? x + 0.5f : x - 0.5f); +} +static inline double rint(double x) { return (x > 0.0) ? floor(x + 0.5) : ceil(x - 0.5); } +static inline float rintf(float x) +{ + return (x > 0.0f) ? floorf(x + 0.5f) : ceilf(x - 0.5f); +} +#endif + +//////////////////////////////////////////////////////////////////////// +// math constants +//////////////////////////////////////////////////////////////////////// +#if _MSC_VER < 1800 +#include +#define INFINITY HUGE_VAL +#endif + +//////////////////////////////////////////////////////////////////////// +// random and srandom +//////////////////////////////////////////////////////////////////////// +#include +static inline long int random(void) { return rand(); } +static inline void srandom(unsigned int seed) { srand(seed); } + +#endif // _MSC_CONFIG_H_ ] diff --git a/cmake/msvc/sys/time.h b/cmake/msvc/sys/time.h new file mode 100644 index 0000000..4bda1ba --- /dev/null +++ b/cmake/msvc/sys/time.h @@ -0,0 +1,71 @@ +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_SYS_TIME_H_ +#define _MSC_SYS_TIME_H_ + +// prevent windows.h from clobbering min and max functions with macros +#ifndef NOMINMAX +#define NOMINMAX +#endif + +// http://social.msdn.microsoft.com/Forums/en/vcgeneral/thread/430449b3-f6dd-4e18-84de-eebd26a8d668 +#include < time.h > +#include //I've omitted this line. +#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS) +#define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64 +#else +#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL +#endif + +#if _MSC_VER < 1900 +struct timespec { + + time_t tv_sec; /* Seconds since 00:00:00 GMT, */ + + /* 1 January 1970 */ + + long tv_nsec; /* Additional nanoseconds since */ + + /* tv_sec */ +}; +#endif + +struct timezone { + int tz_minuteswest; /* minutes W of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + +static inline int gettimeofday(struct timeval* tv, struct timezone* tz) +{ + FILETIME ft; + unsigned __int64 tmpres = 0; + static int tzflag; + + if (NULL != tv) { + GetSystemTimeAsFileTime(&ft); + + tmpres |= ft.dwHighDateTime; + tmpres <<= 32; + tmpres |= ft.dwLowDateTime; + + /*converting file time to unix epoch*/ + tmpres -= DELTA_EPOCH_IN_MICROSECS; + tv->tv_sec = (long)(tmpres / 1000000UL); + tv->tv_usec = (long)(tmpres % 1000000UL); + } + + if (NULL != tz) { + if (!tzflag) { + _tzset(); + tzflag++; + } + tz->tz_minuteswest = _timezone / 60; + tz->tz_dsttime = _daylight; + } + + return 0; +} + +#endif //_MSC_SYS_TIME_H_ diff --git a/cpu_features/.clang-format b/cpu_features/.clang-format new file mode 100644 index 0000000..06ea346 --- /dev/null +++ b/cpu_features/.clang-format @@ -0,0 +1,4 @@ +--- +Language: Cpp +BasedOnStyle: Google +... diff --git a/cpu_features/.dockerignore b/cpu_features/.dockerignore new file mode 100644 index 0000000..716b782 --- /dev/null +++ b/cpu_features/.dockerignore @@ -0,0 +1,31 @@ +# Project Files unneeded by docker +ci/Makefile +ci/docker +ci/doc +ci/cache +.git +.gitignore +.github +.dockerignore +.clang-format +appveyor.yml +.travis.yml +AUTHORS +CONTRIBUTING.md +CONTRIBUTORS +LICENSE +README.md + +build/ +cmake_build/ +build_cross/ +cmake-build-*/ +out/ + +# Editor directories and files +.idea/ +.vagrant/ +.vscode/ +.vs/ +*.user +*.swp diff --git a/cpu_features/.github/workflows/Dockerfile b/cpu_features/.github/workflows/Dockerfile new file mode 100644 index 0000000..8d6f3dc --- /dev/null +++ b/cpu_features/.github/workflows/Dockerfile @@ -0,0 +1,5 @@ +# Create a virtual environment with all tools installed +# ref: https://hub.docker.com/_/alpine +FROM alpine:edge +# Install system build dependencies +RUN apk add --no-cache git clang-extra-tools diff --git a/cpu_features/.github/workflows/aarch64_linux.yml b/cpu_features/.github/workflows/aarch64_linux.yml new file mode 100644 index 0000000..2de7289 --- /dev/null +++ b/cpu_features/.github/workflows/aarch64_linux.yml @@ -0,0 +1,28 @@ +name: aarch64 Linux + +on: + push: + pull_request: + schedule: + # min hours day(month) month day(week) + - cron: '0 0 7,22 * *' + +jobs: + # Building using the github runner environement directly. + aarch64: + runs-on: ubuntu-latest + strategy: + matrix: + targets: [ + [aarch64-linux-gnu], + [aarch64_be-linux-gnu] + ] + fail-fast: false + env: + TARGET: ${{ matrix.targets[0] }} + steps: + - uses: actions/checkout@v2 + - name: Build + run: make --directory=ci ${TARGET}_build + - name: Test + run: make --directory=ci ${TARGET}_test diff --git a/cpu_features/.github/workflows/amd64_freebsd.yml b/cpu_features/.github/workflows/amd64_freebsd.yml new file mode 100644 index 0000000..eeab380 --- /dev/null +++ b/cpu_features/.github/workflows/amd64_freebsd.yml @@ -0,0 +1,22 @@ +name: amd64 FreeBSD + +on: + push: + pull_request: + schedule: + # min hours day(month) month day(week) + - cron: '0 0 7,22 * *' + +jobs: + # Only MacOS hosted runner provides virtualisation with vagrant/virtualbox installed. + # see: https://github.com/actions/virtual-environments/tree/main/images/macos + freebsd: + runs-on: macos-10.15 + steps: + - uses: actions/checkout@v2 + - name: vagrant version + run: Vagrant --version + - name: VirtualBox version + run: virtualbox -h + - name: Build + run: cd ci/vagrant/freebsd && vagrant up diff --git a/cpu_features/.github/workflows/amd64_linux.yml b/cpu_features/.github/workflows/amd64_linux.yml new file mode 100644 index 0000000..21f4f90 --- /dev/null +++ b/cpu_features/.github/workflows/amd64_linux.yml @@ -0,0 +1,31 @@ +name: amd64 Linux + +on: + push: + pull_request: + schedule: + # min hours day(month) month day(week) + - cron: '0 0 7,22 * *' + +jobs: + # Building using the github runner environement directly. + make: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Env + run: make --directory=ci amd64_env + - name: Devel + run: make --directory=ci amd64_devel + - name: Build + run: make --directory=ci amd64_build + - name: Test + run: make --directory=ci amd64_test + - name: Install Env + run: make --directory=ci amd64_install_env + - name: Install Devel + run: make --directory=ci amd64_install_devel + - name: Install Build + run: make --directory=ci amd64_install_build + - name: Install Test + run: make --directory=ci amd64_install_test diff --git a/cpu_features/.github/workflows/amd64_macos.yml b/cpu_features/.github/workflows/amd64_macos.yml new file mode 100644 index 0000000..19ec18c --- /dev/null +++ b/cpu_features/.github/workflows/amd64_macos.yml @@ -0,0 +1,43 @@ +name: amd64 macOS + +on: + push: + pull_request: + schedule: + # min hours day(month) month day(week) + - cron: '0 0 7,22 * *' + +jobs: + # Building using the github runner environement directly. + xcode: + runs-on: macos-latest + env: + CTEST_OUTPUT_ON_FAILURE: 1 + steps: + - uses: actions/checkout@v2 + - name: Check cmake + run: cmake --version + - name: Configure + run: cmake -S. -Bbuild -G "Xcode" -DCMAKE_CONFIGURATION_TYPES=Release + - name: Build + run: cmake --build build --config Release --target ALL_BUILD -v + - name: Test + run: cmake --build build --config Release --target RUN_TESTS -v + - name: Install + run: cmake --build build --config Release --target install -v + make: + runs-on: macos-latest + env: + CTEST_OUTPUT_ON_FAILURE: 1 + steps: + - uses: actions/checkout@v2 + - name: Check cmake + run: cmake --version + - name: Configure + run: cmake -S. -Bbuild -DCMAKE_BUILD_TYPE=Release + - name: Build + run: cmake --build build --target all -v + - name: Test + run: cmake --build build --target test -v + - name: Install + run: cmake --build build --target install -v diff --git a/cpu_features/.github/workflows/amd64_windows.yml b/cpu_features/.github/workflows/amd64_windows.yml new file mode 100644 index 0000000..2631db3 --- /dev/null +++ b/cpu_features/.github/workflows/amd64_windows.yml @@ -0,0 +1,25 @@ +name: amd64 Windows + +on: + push: + pull_request: + schedule: + # min hours day(month) month day(week) + - cron: '0 0 7,22 * *' + +jobs: + # Building using the github runner environement directly. + msvc: + runs-on: windows-latest + env: + CTEST_OUTPUT_ON_FAILURE: 1 + steps: + - uses: actions/checkout@v2 + - name: Configure + run: cmake -S. -Bbuild -G "Visual Studio 16 2019" -DCMAKE_CONFIGURATION_TYPES=Release + - name: Build + run: cmake --build build --config Release --target ALL_BUILD -- /maxcpucount + - name: Test + run: cmake --build build --config Release --target RUN_TESTS -- /maxcpucount + - name: Install + run: cmake --build build --config Release --target INSTALL -- /maxcpucount diff --git a/cpu_features/.github/workflows/arm_linux.yml b/cpu_features/.github/workflows/arm_linux.yml new file mode 100644 index 0000000..2272188 --- /dev/null +++ b/cpu_features/.github/workflows/arm_linux.yml @@ -0,0 +1,31 @@ +name: arm Linux + +on: + push: + pull_request: + schedule: + # min hours day(month) month day(week) + - cron: '0 0 7,22 * *' + +jobs: + # Building using the github runner environement directly. + arm: + runs-on: ubuntu-latest + strategy: + matrix: + targets: [ + [arm-linux-gnueabihf], + [armv8l-linux-gnueabihf], + [arm-linux-gnueabi], + [armeb-linux-gnueabihf], + [armeb-linux-gnueabi] + ] + fail-fast: false + env: + TARGET: ${{ matrix.targets[0] }} + steps: + - uses: actions/checkout@v2 + - name: Build + run: make --directory=ci ${TARGET}_build + - name: Test + run: make --directory=ci ${TARGET}_test diff --git a/cpu_features/.github/workflows/clang_format.yml b/cpu_features/.github/workflows/clang_format.yml new file mode 100644 index 0000000..17d1567 --- /dev/null +++ b/cpu_features/.github/workflows/clang_format.yml @@ -0,0 +1,24 @@ +name: clang-format Check + +on: [push, pull_request] + +jobs: + # Building using the github runner environement directly. + clang-format: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Fetch origin/master + run: git fetch origin master + - name: List of changed file(s) + run: git diff --name-only FETCH_HEAD + + - name: Build clang-format docker + run: cd .github/workflows && docker build --tag=linter . + - name: Check clang-format + run: docker run --rm --init -v $(pwd):/repo linter:latest clang-format --version + - name: clang-format help + run: docker run --rm --init -v $(pwd):/repo linter:latest clang-format --help + + - name: Check current commit + run: docker run --rm --init -v $(pwd):/repo -w /repo linter:latest sh -c "git diff --diff-filter=d --name-only FETCH_HEAD | grep '\.c$\|\.h$\|\.cc$' | xargs clang-format --style=file --dry-run --Werror " diff --git a/cpu_features/.github/workflows/mips_linux.yml b/cpu_features/.github/workflows/mips_linux.yml new file mode 100644 index 0000000..571de3a --- /dev/null +++ b/cpu_features/.github/workflows/mips_linux.yml @@ -0,0 +1,30 @@ +name: mips Linux + +on: + push: + pull_request: + schedule: + # min hours day(month) month day(week) + - cron: '0 0 7,22 * *' + +jobs: + # Building using the github runner environement directly. + mips: + runs-on: ubuntu-latest + strategy: + matrix: + targets: [ + [mips32], + [mips32el], + [mips64], + [mips64el] + ] + fail-fast: false + env: + TARGET: ${{ matrix.targets[0] }} + steps: + - uses: actions/checkout@v2 + - name: Build + run: make --directory=ci ${TARGET}_build + - name: Test + run: make --directory=ci ${TARGET}_test diff --git a/cpu_features/.gitignore b/cpu_features/.gitignore new file mode 100644 index 0000000..b68012d --- /dev/null +++ b/cpu_features/.gitignore @@ -0,0 +1,13 @@ +# Build folders +build/ +cmake_build/ +build_cross/ +cmake-build-*/ +out/ + +# IDEs / CI temp files +.idea/ +.vagrant/ +.vscode/ +.vs/ +*.swp diff --git a/cpu_features/CMakeLists.txt b/cpu_features/CMakeLists.txt new file mode 100644 index 0000000..ac3c0a5 --- /dev/null +++ b/cpu_features/CMakeLists.txt @@ -0,0 +1,249 @@ +cmake_minimum_required(VERSION 3.0) + +# option() honors normal variables. +# see: https://cmake.org/cmake/help/git-stage/policy/CMP0077.html +if(POLICY CMP0077) + cmake_policy(SET CMP0077 NEW) +endif() + +project(CpuFeatures VERSION 0.6.0 LANGUAGES C) + +set(CMAKE_C_STANDARD 99) + +# Default Build Type to be Release +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING + "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." + FORCE) +endif(NOT CMAKE_BUILD_TYPE) + +# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make +# it prominent in the GUI. +# cpu_features uses bit-fields which are - to some extends - implementation-defined (see https://en.cppreference.com/w/c/language/bit_field). +# As a consequence it is discouraged to use cpu_features as a shared library because different compilers may interpret the code in different ways. +# Prefer static linking from source whenever possible. +option(BUILD_SHARED_LIBS "Build library as shared." OFF) + +# Force PIC on unix when building shared libs +# see: https://en.wikipedia.org/wiki/Position-independent_code +if(BUILD_SHARED_LIBS AND UNIX) + option(CMAKE_POSITION_INDEPENDENT_CODE "Build with Position Independant Code." ON) +endif() + +include(CheckIncludeFile) +include(CheckSymbolExists) +include(GNUInstallDirs) + +macro(setup_include_and_definitions TARGET_NAME) + target_include_directories(${TARGET_NAME} + PUBLIC $ + PRIVATE $ + ) + target_compile_definitions(${TARGET_NAME} + PUBLIC STACK_LINE_READER_BUFFER_SIZE=1024 + ) +endmacro() + +set(PROCESSOR_IS_MIPS FALSE) +set(PROCESSOR_IS_ARM FALSE) +set(PROCESSOR_IS_AARCH64 FALSE) +set(PROCESSOR_IS_X86 FALSE) +set(PROCESSOR_IS_POWER FALSE) + +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^mips") + set(PROCESSOR_IS_MIPS TRUE) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)") + set(PROCESSOR_IS_AARCH64 TRUE) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") + set(PROCESSOR_IS_ARM TRUE) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)") + set(PROCESSOR_IS_X86 TRUE) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)") + set(PROCESSOR_IS_POWER TRUE) +endif() + +macro(add_cpu_features_headers_and_sources HDRS_LIST_NAME SRCS_LIST_NAME) + list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpu_features_macros.h) + list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpu_features_cache_info.h) + file(GLOB IMPL_SOURCES CONFIGURE_DEPENDS "${PROJECT_SOURCE_DIR}/src/impl_*.c") + list(APPEND ${SRCS_LIST_NAME} ${IMPL_SOURCES}) + if(PROCESSOR_IS_MIPS) + list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_mips.h) + elseif(PROCESSOR_IS_ARM) + list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_arm.h) + elseif(PROCESSOR_IS_AARCH64) + list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_aarch64.h) + elseif(PROCESSOR_IS_X86) + list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_x86.h) + list(APPEND ${SRCS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/internal/cpuid_x86.h) + elseif(PROCESSOR_IS_POWER) + list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_ppc.h) + else() + message(FATAL_ERROR "Unsupported architectures ${CMAKE_SYSTEM_PROCESSOR}") + endif() +endmacro() + +# +# library : utils +# + +add_library(utils OBJECT + ${PROJECT_SOURCE_DIR}/include/internal/bit_utils.h + ${PROJECT_SOURCE_DIR}/include/internal/filesystem.h + ${PROJECT_SOURCE_DIR}/include/internal/stack_line_reader.h + ${PROJECT_SOURCE_DIR}/include/internal/string_view.h + ${PROJECT_SOURCE_DIR}/src/filesystem.c + ${PROJECT_SOURCE_DIR}/src/stack_line_reader.c + ${PROJECT_SOURCE_DIR}/src/string_view.c +) +setup_include_and_definitions(utils) + +# +# library : unix_based_hardware_detection +# + +if(UNIX) + add_library(unix_based_hardware_detection OBJECT + ${PROJECT_SOURCE_DIR}/include/internal/hwcaps.h + ${PROJECT_SOURCE_DIR}/src/hwcaps.c + ) + setup_include_and_definitions(unix_based_hardware_detection) + check_include_file(dlfcn.h HAVE_DLFCN_H) + if(HAVE_DLFCN_H) + target_compile_definitions(unix_based_hardware_detection PRIVATE HAVE_DLFCN_H) + endif() + check_symbol_exists(getauxval "sys/auxv.h" HAVE_STRONG_GETAUXVAL) + if(HAVE_STRONG_GETAUXVAL) + target_compile_definitions(unix_based_hardware_detection PRIVATE HAVE_STRONG_GETAUXVAL) + endif() +endif() + +# +# library : cpu_features +# +set (CPU_FEATURES_HDRS) +set (CPU_FEATURES_SRCS) +add_cpu_features_headers_and_sources(CPU_FEATURES_HDRS CPU_FEATURES_SRCS) +list(APPEND CPU_FEATURES_SRCS $) +if(NOT PROCESSOR_IS_X86 AND UNIX) + list(APPEND CPU_FEATURES_SRCS $) +endif() +add_library(cpu_features ${CPU_FEATURES_HDRS} ${CPU_FEATURES_SRCS}) +set_target_properties(cpu_features PROPERTIES PUBLIC_HEADER "${CPU_FEATURES_HDRS}") +setup_include_and_definitions(cpu_features) +target_link_libraries(cpu_features PUBLIC ${CMAKE_DL_LIBS}) +target_include_directories(cpu_features + PUBLIC $ +) +if(PROCESSOR_IS_X86) + if(APPLE) + target_compile_definitions(cpu_features PRIVATE HAVE_SYSCTLBYNAME) + endif() +endif() +add_library(CpuFeature::cpu_features ALIAS cpu_features) + +# +# program : list_cpu_features +# + +add_executable(list_cpu_features ${PROJECT_SOURCE_DIR}/src/utils/list_cpu_features.c) +target_link_libraries(list_cpu_features PRIVATE cpu_features) +add_executable(CpuFeature::list_cpu_features ALIAS list_cpu_features) + +# +# ndk_compat +# + +if(ANDROID) +add_subdirectory(ndk_compat) +endif() + +# +# tests +# + +include(CTest) +if(BUILD_TESTING) + # Automatically incorporate googletest into the CMake Project if target not + # found. + enable_language(CXX) + + set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD_REQUIRED ON) + set(CMAKE_CXX_EXTENSIONS OFF) # prefer use of -std11 instead of -gnustd11 + + if(NOT TARGET gtest OR NOT TARGET gmock_main) + # Download and unpack googletest at configure time. + configure_file( + cmake/googletest.CMakeLists.txt.in + googletest-download/CMakeLists.txt + ) + + execute_process( + COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download) + + if(result) + message(FATAL_ERROR "CMake step for googletest failed: ${result}") + endif() + + execute_process( + COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download) + + if(result) + message(FATAL_ERROR "Build step for googletest failed: ${result}") + endif() + + # Prevent overriding the parent project's compiler/linker settings on + # Windows. + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + + # Add googletest directly to our build. This defines the gtest and + # gtest_main targets. + add_subdirectory(${CMAKE_BINARY_DIR}/googletest-src + ${CMAKE_BINARY_DIR}/googletest-build + EXCLUDE_FROM_ALL) + endif() + + add_subdirectory(test) +endif() + +# +# Install cpu_features and list_cpu_features +# + +include(GNUInstallDirs) +install(TARGETS cpu_features list_cpu_features + EXPORT CpuFeaturesTargets + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cpu_features + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + BUNDLE DESTINATION ${CMAKE_INSTALL_BINDIR} +) +install(EXPORT CpuFeaturesTargets + NAMESPACE CpuFeatures:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures + COMPONENT Devel +) +include(CMakePackageConfigHelpers) +configure_package_config_file(cmake/CpuFeaturesConfig.cmake.in + "${PROJECT_BINARY_DIR}/CpuFeaturesConfig.cmake" + INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures" + NO_SET_AND_CHECK_MACRO + NO_CHECK_REQUIRED_COMPONENTS_MACRO +) +write_basic_package_version_file( + "${PROJECT_BINARY_DIR}/CpuFeaturesConfigVersion.cmake" + COMPATIBILITY SameMajorVersion +) +install( + FILES + "${PROJECT_BINARY_DIR}/CpuFeaturesConfig.cmake" + "${PROJECT_BINARY_DIR}/CpuFeaturesConfigVersion.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures" + COMPONENT Devel +) diff --git a/cpu_features/CONTRIBUTING.md b/cpu_features/CONTRIBUTING.md new file mode 100644 index 0000000..c980350 --- /dev/null +++ b/cpu_features/CONTRIBUTING.md @@ -0,0 +1,23 @@ +# How to Contribute + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement. You (or your employer) retain the copyright to your contribution; +this simply gives us permission to use and redistribute your contributions as +part of the project. Head over to to see +your current agreements on file or to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. diff --git a/cpu_features/LICENSE b/cpu_features/LICENSE new file mode 100644 index 0000000..a7043c6 --- /dev/null +++ b/cpu_features/LICENSE @@ -0,0 +1,230 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- +For files in the `ndk_compat` folder: +-------------------------------------------------------------------------------- + +Copyright (C) 2010 The Android Open Source Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. diff --git a/cpu_features/README.md b/cpu_features/README.md new file mode 100644 index 0000000..a2b213f --- /dev/null +++ b/cpu_features/README.md @@ -0,0 +1,228 @@ +# cpu_features +[![Linux Status][linux_svg]][linux_link] +[![Macos Status][macos_svg]][macos_link] +[![Windows Status][windows_svg]][windows_link] + +[linux_svg]: https://github.com/google/cpu_features/actions/workflows/amd64_linux.yml/badge.svg?branch=master +[linux_link]: https://github.com/google/cpu_features/actions/workflows/amd64_linux.yml +[macos_svg]: https://github.com/google/cpu_features/actions/workflows/amd64_macos.yml/badge.svg?branch=master +[macos_link]: https://github.com/google/cpu_features/actions/workflows/amd64_macos.yml +[windows_svg]: https://github.com/google/cpu_features/actions/workflows/amd64_windows.yml/badge.svg?branch=master +[windows_link]: https://github.com/google/cpu_features/actions/workflows/amd64_windows.yml + +A cross-platform C library to retrieve CPU features (such as available +instructions) at runtime. + +## Table of Contents + +- [Design Rationale](#rationale) +- [Code samples](#codesample) +- [Running sample code](#usagesample) +- [What's supported](#support) +- [Android NDK's drop in replacement](#ndk) +- [License](#license) +- [Build with cmake](#cmake) +- [Community Bindings](#bindings) + + +## Design Rationale + +- **Simple to use.** See the snippets below for examples. +- **Extensible.** Easy to add missing features or architectures. +- **Compatible with old compilers** and available on many architectures so it + can be used widely. To ensure that cpu_features works on as many platforms + as possible, we implemented it in a highly portable version of C: C99. +- **Sandbox-compatible.** The library uses a variety of strategies to cope + with sandboxed environments or when `cpuid` is unavailable. This is useful + when running integration tests in hermetic environments. +- **Thread safe, no memory allocation, and raises no exceptions.** + cpu_features is suitable for implementing fundamental libc functions like + `malloc`, `memcpy`, and `memcmp`. +- **Unit tested.** + + +## Code samples + +**Note:** For C++ code, the library functions are defined in the `cpu_features` namespace. + +### Checking features at runtime + +Here's a simple example that executes a codepath if the CPU supports both the +AES and the SSE4.2 instruction sets: + +```c +#include "cpuinfo_x86.h" + +// For C++, add `using namespace cpu_features;` +static const X86Features features = GetX86Info().features; + +void Compute(void) { + if (features.aes && features.sse4_2) { + // Run optimized code. + } else { + // Run standard code. + } +} +``` + +### Caching for faster evaluation of complex checks + +If you wish, you can read all the features at once into a global variable, and +then query for the specific features you care about. Below, we store all the ARM +features and then check whether AES and NEON are supported. + +```c +#include +#include "cpuinfo_arm.h" + +// For C++, add `using namespace cpu_features;` +static const ArmFeatures features = GetArmInfo().features; +static const bool has_aes_and_neon = features.aes && features.neon; + +// use has_aes_and_neon. +``` + +This is a good approach to take if you're checking for combinations of features +when using a compiler that is slow to extract individual bits from bit-packed +structures. + +### Checking compile time flags + +The following code determines whether the compiler was told to use the AVX +instruction set (e.g., `g++ -mavx`) and sets `has_avx` accordingly. + +```c +#include +#include "cpuinfo_x86.h" + +// For C++, add `using namespace cpu_features;` +static const X86Features features = GetX86Info().features; +static const bool has_avx = CPU_FEATURES_COMPILED_X86_AVX || features.avx; + +// use has_avx. +``` + +`CPU_FEATURES_COMPILED_X86_AVX` is set to 1 if the compiler was instructed to +use AVX and 0 otherwise, combining compile time and runtime knowledge. + +### Rejecting poor hardware implementations based on microarchitecture + +On x86, the first incarnation of a feature in a microarchitecture might not be +the most efficient (e.g. AVX on Sandy Bridge). We provide a function to retrieve +the underlying microarchitecture so you can decide whether to use it. + +Below, `has_fast_avx` is set to 1 if the CPU supports the AVX instruction +set—but only if it's not Sandy Bridge. + +```c +#include +#include "cpuinfo_x86.h" + +// For C++, add `using namespace cpu_features;` +static const X86Info info = GetX86Info(); +static const X86Microarchitecture uarch = GetX86Microarchitecture(&info); +static const bool has_fast_avx = info.features.avx && uarch != INTEL_SNB; + +// use has_fast_avx. +``` + +This feature is currently available only for x86 microarchitectures. + + +### Running sample code + +Building `cpu_features` (check [quickstart](#quickstart) below) brings a small executable to test the library. + +```shell + % ./build/list_cpu_features +arch : x86 +brand : Intel(R) Xeon(R) CPU E5-1650 0 @ 3.20GHz +family : 6 (0x06) +model : 45 (0x2D) +stepping : 7 (0x07) +uarch : INTEL_SNB +flags : aes,avx,cx16,smx,sse4_1,sse4_2,ssse3 +``` + +```shell +% ./build/list_cpu_features --json +{"arch":"x86","brand":" Intel(R) Xeon(R) CPU E5-1650 0 @ 3.20GHz","family":6,"model":45,"stepping":7,"uarch":"INTEL_SNB","flags":["aes","avx","cx16","smx","sse4_1","sse4_2","ssse3"]} +``` + + +## What's supported + +| | x86³ | ARM | AArch64 | MIPS⁴ | POWER | +|---------|:----:|:-------:|:-------:|:-------:|:-------:| +| Android | yes² | yes¹ | yes¹ | yes¹ | N/A | +| iOS | N/A | not yet | not yet | N/A | N/A | +| Linux | yes² | yes¹ | yes¹ | yes¹ | yes¹ | +| MacOs | yes² | N/A | not yet | N/A | no | +| Windows | yes² | not yet | not yet | N/A | N/A | +| FreeBSD | yes² | not yet | not yet | not yet | not yet | + +1. **Features revealed from Linux.** We gather data from several sources + depending on availability: + + from glibc's + [getauxval](https://www.gnu.org/software/libc/manual/html_node/Auxiliary-Vector.html) + + by parsing `/proc/self/auxv` + + by parsing `/proc/cpuinfo` +2. **Features revealed from CPU.** features are retrieved by using the `cpuid` + instruction. +3. **Microarchitecture detection.** On x86 some features are not always + implemented efficiently in hardware (e.g. AVX on Sandybridge). Exposing the + microarchitecture allows the client to reject particular microarchitectures. +4. All flavors of Mips are supported, little and big endian as well as 32/64 + bits. + + +## Android NDK's drop in replacement + +[cpu_features](https://github.com/google/cpu_features) is now officially +supporting Android and offers a drop in replacement of for the NDK's [cpu-features.h](https://android.googlesource.com/platform/ndk/+/master/sources/android/cpufeatures/cpu-features.h) +, see [ndk_compat](ndk_compat) folder for details. + + +## License + +The cpu_features library is licensed under the terms of the Apache license. +See [LICENSE](LICENSE) for more information. + + +## Build with CMake + +Please check the [CMake build instructions](cmake/README.md). + + +### Quickstart + + - Run `list_cpu_features` +```sh +cmake -S. -Bbuild -DBUILD_TESTING=OFF -DCMAKE_BUILD_TYPE=Release +cmake --build build --config Release -j +./build/list_cpu_features --json +``` + +_Note_: Use `--target ALL_BUILD` on the second line for `Visual Studio` and `XCode`. + + - run tests +```sh +cmake -S. -Bbuild -DBUILD_TESTING=ON -DCMAKE_BUILD_TYPE=Debug +cmake --build build --config Debug -j +cmake --build build --config Debug --target test +``` + +_Note_: Use `--target RUN_TESTS` on the last line for `Visual Studio` and `--target RUN_TEST` for `XCode`. + + +## Community bindings + +Links provided here are not affiliated with Google but are kindly provided by the OSS Community. + + - .Net + - https://github.com/toor1245/cpu_features.NET + - Python + - https://github.com/Narasimha1997/py_cpu + + +_Send PR to showcase your wrapper here_ diff --git a/cpu_features/ci/Makefile b/cpu_features/ci/Makefile new file mode 100644 index 0000000..47ea30a --- /dev/null +++ b/cpu_features/ci/Makefile @@ -0,0 +1,240 @@ +PROJECT := cpu_features +BRANCH := $(shell git rev-parse --abbrev-ref HEAD) +SHA1 := $(shell git rev-parse --verify HEAD) + +# General commands +.PHONY: help +BOLD=\e[1m +RESET=\e[0m + +help: + @echo -e "${BOLD}SYNOPSIS${RESET}" + @echo -e "\tmake [NOCACHE=1]" + @echo + @echo -e "${BOLD}DESCRIPTION${RESET}" + @echo -e "\ttest build inside docker container to have a reproductible build." + @echo + @echo -e "${BOLD}MAKE TARGETS${RESET}" + @echo -e "\t${BOLD}help${RESET}: display this help and exit." + @echo + @echo -e "\t${BOLD}amd64_${RESET}: build docker image using an Ubuntu:latest x86_64 base image." + @echo -e "\t${BOLD}save_amd64_${RESET}: Save the docker image." + @echo -e "\t${BOLD}sh_amd64_${RESET}: run a container using the docker image (debug purpose)." + @echo -e "\t${BOLD}clean_amd64_${RESET}: Remove cache and docker image." + @echo + @echo -e "\tWith ${BOLD}${RESET}:" + @echo -e "\t\t${BOLD}env${RESET}" + @echo -e "\t\t${BOLD}devel${RESET}" + @echo -e "\t\t${BOLD}build${RESET}" + @echo -e "\t\t${BOLD}test${RESET}" + @echo -e "\t\t${BOLD}install_env${RESET}" + @echo -e "\t\t${BOLD}install_devel${RESET}" + @echo -e "\t\t${BOLD}install_build${RESET}" + @echo -e "\t\t${BOLD}install_test${RESET}" + @echo -e "\te.g. 'make amd64_build'" + @echo + @echo -e "\t${BOLD}_${RESET}: build docker image for a specific toolchain target." + @echo -e "\t${BOLD}save__${RESET}: Save the docker image for a specific platform." + @echo -e "\t${BOLD}sh__${RESET}: run a container using the docker image specified (debug purpose)." + @echo -e "\t${BOLD}clean__${RESET}: Remove cache and docker image." + @echo + @echo -e "\tWith ${BOLD}${RESET}:" + @echo -e "\t\t${BOLD}arm-linux-gnueabihf${RESET} (linaro toolchain)" + @echo -e "\t\t${BOLD}armv8l-linux-gnueabihf${RESET} (linaro toolchain)" + @echo -e "\t\t${BOLD}arm-linux-gnueabi${RESET} (linaro toolchain)" + @echo -e "\t\t${BOLD}armeb-linux-gnueabihf${RESET} (linaro toolchain)" + @echo -e "\t\t${BOLD}armeb-linux-gnueabi${RESET} (linaro toolchain)" + @echo -e "\t\t${BOLD}aarch64-linux-gnu${RESET} (linaro toolchain)" + @echo -e "\t\t${BOLD}aarch64_be-linux-gnu${RESET} (linaro toolchain)" + @echo -e "\t\t${BOLD}mips32${RESET} (codespace toolchain)" + @echo -e "\t\t${BOLD}mips64${RESET} (codespace toolchain)" + @echo -e "\t\t${BOLD}mips32el${RESET} (codespace toolchain)" + @echo -e "\t\t${BOLD}mips64el${RESET} (codespace toolchain)" + @echo + @echo -e "\tWith ${BOLD}${RESET}:" + @echo -e "\t\t${BOLD}env${RESET}" + @echo -e "\t\t${BOLD}devel${RESET}" + @echo -e "\t\t${BOLD}build${RESET}" + @echo -e "\t\t${BOLD}test${RESET}" + @echo -e "\te.g. 'make aarch64_test'" + @echo + @echo -e "\t${BOLD}${RESET}: build the vagrant virtual machine." + @echo -e "\t${BOLD}clean_${RESET}: Remove virtual machine for the specified vm." + @echo + @echo -e "\t${BOLD}${RESET}:" + @echo -e "\t\t${BOLD}freebsd${RESET} (FreeBSD)" + @echo + @echo -e "\t${BOLD}clean${RESET}: Remove cache and ALL docker images." + @echo + @echo -e "\t${BOLD}NOCACHE=1${RESET}: use 'docker build --no-cache' when building container (default use cache)." + @echo + @echo -e "branch: $(BRANCH)" + @echo -e "sha1: $(SHA1)" + +# Need to add cmd_platform to PHONY otherwise target are ignored since they do not +# contain recipe (using FORCE do not work here) +.PHONY: all +all: build + +# Delete all implicit rules to speed up makefile +MAKEFLAGS += --no-builtin-rules +.SUFFIXES: +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = +# Keep all intermediate files +# ToDo: try to remove it later +.SECONDARY: + +# Docker image name prefix. +IMAGE := ${PROJECT} + +ifdef NOCACHE +DOCKER_BUILD_CMD := docker build --no-cache +else +DOCKER_BUILD_CMD := docker build +endif + +DOCKER_RUN_CMD := docker run --rm --init --net=host + +# $* stem +# $< first prerequist +# $@ target name + +############ +## NATIVE ## +############ +STAGES = env devel build test install_env install_devel install_build install_test + +targets_amd64 = $(addprefix amd64_, $(STAGES)) +.PHONY: $(targets_amd64) +$(targets_amd64): amd64_%: docker/amd64/Dockerfile + #@docker image rm -f ${IMAGE}:amd64_$* 2>/dev/null + ${DOCKER_BUILD_CMD} \ + --tag ${IMAGE}:amd64_$* \ + --target=$* \ + -f $< \ + .. + +#$(info Create targets: save_amd64 $(addprefix save_amd64_, $(STAGES)) (debug).) +save_targets_amd64 = $(addprefix save_amd64_, $(STAGES)) +.PHONY: $(save_targets_amd64) +$(save_targets_amd64): save_amd64_%: cache/amd64/docker_%.tar +cache/amd64/docker_%.tar: amd64_% + @rm -f $@ + mkdir -p cache/amd64 + docker save ${IMAGE}:amd64_$* -o $@ + +#$(info Create targets: $(addprefix sh_amd64_, $(STAGES)) (debug).) +sh_targets_amd64 = $(addprefix sh_amd64_, $(STAGES)) +.PHONY: $(sh_targets_amd64) +$(sh_targets_amd64): sh_amd64_%: amd64_% + ${DOCKER_RUN_CMD} -it --name ${IMAGE}_amd64_$* ${IMAGE}:amd64_$* + +#$(info Create targets: $(addprefix clean_amd64_, $(STAGES)).) +clean_targets_amd64 = $(addprefix clean_amd64_, $(STAGES)) +.PHONY: clean_amd64 $(clean_targets_amd64) +clean_amd64: $(clean_targets_amd64) +$(clean_targets_amd64): clean_amd64_%: + docker image rm -f ${IMAGE}:amd64_$* 2>/dev/null + rm -f cache/amd64/docker_$*.tar + + +############### +## TOOLCHAIN ## +############### +TOOLCHAIN_TARGETS = \ + arm-linux-gnueabihf armv8l-linux-gnueabihf arm-linux-gnueabi armeb-linux-gnueabihf armeb-linux-gnueabi \ + aarch64-linux-gnu aarch64_be-linux-gnu \ + mips32 mips32el mips64 mips64el +TOOLCHAIN_STAGES = env devel build test +define toolchain-stage-target = +#$$(info STAGE: $1) +#$$(info Create targets: toolchain_$1 $(addsuffix _$1, $(TOOLCHAIN_TARGETS)).) +targets_toolchain_$1 = $(addsuffix _$1, $(TOOLCHAIN_TARGETS)) +.PHONY: toolchain_$1 $$(targets_toolchain_$1) +toolchain_$1: $$(targets_toolchain_$1) +$$(targets_toolchain_$1): %_$1: docker/toolchain/Dockerfile + #@docker image rm -f ${IMAGE}:$$*_$1 2>/dev/null + ${DOCKER_BUILD_CMD} \ + --tag ${IMAGE}:$$*_$1 \ + --build-arg TARGET=$$* \ + --target=$1 \ + -f $$< \ + .. + +#$$(info Create targets: save_toolchain_$1 $(addprefix save_, $(addsuffix _$1, $(TOOLCHAIN_TARGETS))) (debug).) +save_targets_toolchain_$1 = $(addprefix save_, $(addsuffix _$1, $(TOOLCHAIN_TARGETS))) +.PHONY: save_toolchain_$1 $$(save_targets_toolchain_$1) +save_toolchain_$1: $$(save_targets_toolchain_$1) +$$(save_targets_toolchain_$1): save_%_$1: cache/%/docker_$1.tar +cache/%/docker_$1.tar: %_$1 + @rm -f $$@ + mkdir -p cache/$$* + docker save ${IMAGE}:$$*_$1 -o $$@ + +#$$(info Create targets: $(addprefix sh_, $(addsuffix _$1, $(TOOLCHAIN_TARGETS))) (debug).) +sh_targets_toolchain_$1 = $(addprefix sh_, $(addsuffix _$1, $(TOOLCHAIN_TARGETS))) +.PHONY: $$(sh_targets_toolchain_$1) +$$(sh_targets_toolchain_$1): sh_%_$1: %_$1 + ${DOCKER_RUN_CMD} -it --name ${IMAGE}_$$*_$1 ${IMAGE}:$$*_$1 + +#$$(info Create targets: clean_toolchain_$1 $(addprefix clean_, $(addsuffix _$1, $(TOOLCHAIN_TARGETS))).) +clean_targets_toolchain_$1 = $(addprefix clean_, $(addsuffix _$1, $(TOOLCHAIN_TARGETS))) +.PHONY: clean_toolchain_$1 $$(clean_targets_toolchain_$1) +clean_toolchain_$1: $$(clean_targets_toolchain_$1) +$$(clean_targets_toolchain_$1): clean_%_$1: + docker image rm -f ${IMAGE}:$$*_$1 2>/dev/null + rm -f cache/$$*/docker_$1.tar +endef + +$(foreach stage,$(TOOLCHAIN_STAGES),$(eval $(call toolchain-stage-target,$(stage)))) + +## MERGE ## +.PHONY: clean_toolchain +clean_toolchain: $(addprefix clean_toolchain_, $(TOOLCHAIN_STAGES)) + -rmdir $(addprefix cache/, $(TOOLCHAIN_TARGETS)) + +.PHONY: env devel build test +env: amd64_env toolchain_env +devel: amd64_devel toolchain_devel +build: amd64_build toolchain_build +test: amd64_test toolchain_test + +.PHONY: install_env install_devel install_build install_test +install_env: amd64_install_env +install_devel: amd64_install_devel +install_build: amd64_install_build +install_test: amd64_install_test + +############# +## VAGRANT ## +############# +VMS = freebsd + +vms_targets = $(addsuffix _build, $(VMS)) +.PHONY: $(vms_targets) +$(vms_targets): %_build: vagrant/%/Vagrantfile + @cd vagrant/$* && vagrant destroy -f + cd vagrant/$* && vagrant up + +clean_vms_targets = $(addprefix clean_, $(VMS)) +.PHONY: clean_vms $(clean_vms_targets) +clean_vms: $(clean_vms_targets) +$(clean_vms_targets): clean_%: + cd vagrant/$* && vagrant destroy -f + -rm -rf vagrant/$*/.vagrant + +########### +## CLEAN ## +########### +.PHONY: clean +clean: clean_amd64 clean_toolchain clean_vms + docker container prune -f + docker image prune -f + -rmdir cache + +.PHONY: distclean +distclean: clean + -docker container rm -f $$(docker container ls -aq) + -docker image rm -f $$(docker image ls -aq) + -vagrant box remove -f generic/freebsd12 diff --git a/cpu_features/ci/README.md b/cpu_features/ci/README.md new file mode 100644 index 0000000..faaac59 --- /dev/null +++ b/cpu_features/ci/README.md @@ -0,0 +1,66 @@ +# GitHub-CI Status +| OS | amd64 | AArch64 | ARM | MIPS | +|:-------- | :----: | :-----: | :-: | :--: | +| FreeBSD | [![Status][freebsd_svg]][freebsd_link] | N/A | N/A | N/A | +| Linux | [![Status][linux_svg]][linux_link] | [![Status][linux_aarch64_svg]][linux_aarch64_link] | [![Status][linux_arm_svg]][linux_arm_link] | [![Status][linux_mips_svg]][linux_mips_link] | +| MacOS | [![Status][macos_svg]][macos_link] | N/A | N/A | N/A | +| Windows | [![Status][windows_svg]][windows_link] | N/A | N/A | N/A | + +[freebsd_svg]: https://github.com/google/cpu_features/actions/workflows/amd64_freebsd.yml/badge.svg?branch=master +[freebsd_link]: https://github.com/google/cpu_features/actions/workflows/amd64_freebsd.yml + +[linux_svg]: https://github.com/google/cpu_features/actions/workflows/amd64_linux.yml/badge.svg?branch=master +[linux_link]: https://github.com/google/cpu_features/actions/workflows/amd64_linux.yml +[linux_aarch64_svg]: https://github.com/google/cpu_features/actions/workflows/aarch64_linux.yml/badge.svg?branch=master +[linux_aarch64_link]: https://github.com/google/cpu_features/actions/workflows/aarch64_linux.yml +[linux_arm_svg]: https://github.com/google/cpu_features/actions/workflows/arm_linux.yml/badge.svg?branch=master +[linux_arm_link]: https://github.com/google/cpu_features/actions/workflows/arm_linux.yml +[linux_mips_svg]: https://github.com/google/cpu_features/actions/workflows/mips_linux.yml/badge.svg?branch=master +[linux_mips_link]: https://github.com/google/cpu_features/actions/workflows/mips_linux.yml + +[macos_svg]: https://github.com/google/cpu_features/actions/workflows/amd64_macos.yml/badge.svg?branch=master +[macos_link]: https://github.com/google/cpu_features/actions/workflows/amd64_macos.yml + +[windows_svg]: https://github.com/google/cpu_features/actions/workflows/amd64_windows.yml/badge.svg?branch=master +[windows_link]: https://github.com/google/cpu_features/actions/workflows/amd64_windows.yml + +## Makefile/Docker testing +To test the build on various distro, we are using docker containers and a Makefile for orchestration. + +pros: +* You are independent of third party CI runner config + (e.g. [github action virtual-environnments](https://github.com/actions/virtual-environments)). +* You can run it locally on your linux system. +* Most CI provide runners with docker and Makefile installed. + +cons: +* Only GNU/Linux distro supported. + +### Usage +To get the help simply type: +```sh +make +``` + +note: you can also use from top directory +```sh +make --directory=ci +``` + +### Example +For example to test mips32 inside an container: +```sh +make mips32_test +``` + +### Docker layers +Dockerfile is splitted in several stages. + +![docker](doc/docker.svg) + + +## Makefile/Vagrant testing +To test build for FreeBSD we are using Vagrant and VirtualBox box. + +This is similar to the docker stuff but use `vagrant` as `docker` cli and +VirtuaBox to replace the docker engine daemon. diff --git a/cpu_features/ci/doc/docker.dot b/cpu_features/ci/doc/docker.dot new file mode 100644 index 0000000..a00ef1f --- /dev/null +++ b/cpu_features/ci/doc/docker.dot @@ -0,0 +1,64 @@ +@startdot +digraph DockerDeps { + //rankdir=BT; + rankdir=TD; + node [shape=cylinder, style="rounded,filled", color=black, fillcolor=royalblue]; + DISTRO_IMG [label="ubuntu:latest"]; + PKG [label="packages\ne.g. cmake, g++", shape=box3d]; + SRC [label="git repo", shape=folder]; + SPL [label="sample", shape=folder]; + + subgraph clusterDockerfile { + ENV_IMG [label="cpu_features:amd64_env\nenv"]; + DEVEL_IMG [label="cpu_features:amd64_devel\ndevel"]; + BUILD_IMG [label="cpu_features:amd64_build\nbuild"]; + TEST_IMG [label="cpu_features:amd64_test\ntest"]; + INSTALL_ENV_IMG [label="cpu_features:amd64_install_env\ninstall_env"]; + INSTALL_DEVEL_IMG [label="cpu_features:amd64_install_devel\ninstall_devel"]; + INSTALL_BUILD_IMG [label="cpu_features:amd64_install_build\ninstall_build"]; + INSTALL_TEST_IMG [label="cpu_features:amd64_install_test\ninstall_test"]; + + ENV_IMG -> DEVEL_IMG; + DEVEL_IMG -> BUILD_IMG; + BUILD_IMG -> TEST_IMG; + + ENV_IMG -> INSTALL_ENV_IMG; + BUILD_IMG -> INSTALL_ENV_IMG [label="copy install", style="dashed"]; + INSTALL_ENV_IMG -> INSTALL_DEVEL_IMG; + SPL -> INSTALL_DEVEL_IMG [label="copy", style="dashed"]; + INSTALL_DEVEL_IMG -> INSTALL_BUILD_IMG; + INSTALL_BUILD_IMG -> INSTALL_TEST_IMG; + + color=royalblue; + label = "docker/amd64/Dockerfile"; + } + DISTRO_IMG -> ENV_IMG; + PKG -> ENV_IMG [label="install", style="dashed"]; + SRC -> DEVEL_IMG [label="copy", style="dashed"]; + + subgraph clusterCache { + node [shape=note, style="rounded,filled", color=black, fillcolor=royalblue]; + ENV_TAR [label="docker_amd64_env.tar"]; + DEVEL_TAR [label="docker_amd64_devel.tar"]; + BUILD_TAR [label="docker_amd64_build.tar"]; + TEST_TAR [label="docker_amd64_test.tar"]; + INSTALL_ENV_TAR [label="docker_amd64_install_env.tar"]; + INSTALL_DEVEL_TAR [label="docker_amd64_install_devel.tar"]; + INSTALL_BUILD_TAR [label="docker_amd64_install_build.tar"]; + INSTALL_TEST_TAR [label="docker_amd64_install_test.tar"]; + + edge [color=red]; + ENV_IMG -> ENV_TAR [label="make save_amd64_env"]; + DEVEL_IMG -> DEVEL_TAR [label="make save_amd64_devel"]; + BUILD_IMG -> BUILD_TAR [label="make save_amd64_build"]; + TEST_IMG -> TEST_TAR [label="make save_amd64_test"]; + INSTALL_ENV_IMG -> INSTALL_ENV_TAR [label="make save_amd64_install_env"]; + INSTALL_DEVEL_IMG -> INSTALL_DEVEL_TAR [label="make save_amd64_install_devel"]; + INSTALL_BUILD_IMG -> INSTALL_BUILD_TAR [label="make save_amd64_install_build"]; + INSTALL_TEST_IMG -> INSTALL_TEST_TAR [label="make save_amd64_install_test"]; + + color=royalblue; + label = "cache/amd64/"; + } +} +@enddot diff --git a/cpu_features/ci/doc/docker.svg b/cpu_features/ci/doc/docker.svg new file mode 100644 index 0000000..bd9bd6d --- /dev/null +++ b/cpu_features/ci/doc/docker.svg @@ -0,0 +1,312 @@ + + + + + + +DockerDeps + + +clusterDockerfile + +docker/amd64/Dockerfile + + +clusterCache + +cache/amd64/ + + + +DISTRO_IMG + + +ubuntu:latest + + + +ENV_IMG + + +cpu_features:amd64_env +env + + + +DISTRO_IMG->ENV_IMG + + + + + +PKG + + + + +packages +e.g. cmake, g++ + + + +PKG->ENV_IMG + + +install + + + +SRC + +git repo + + + +DEVEL_IMG + + +cpu_features:amd64_devel +devel + + + +SRC->DEVEL_IMG + + +copy + + + +SPL + +sample + + + +INSTALL_DEVEL_IMG + + +cpu_features:amd64_install_devel +install_devel + + + +SPL->INSTALL_DEVEL_IMG + + +copy + + + +ENV_IMG->DEVEL_IMG + + + + + +INSTALL_ENV_IMG + + +cpu_features:amd64_install_env +install_env + + + +ENV_IMG->INSTALL_ENV_IMG + + + + + +ENV_TAR + + + +docker_amd64_env.tar + + + +ENV_IMG->ENV_TAR + + +make save_amd64_env + + + +BUILD_IMG + + +cpu_features:amd64_build +build + + + +DEVEL_IMG->BUILD_IMG + + + + + +DEVEL_TAR + + + +docker_amd64_devel.tar + + + +DEVEL_IMG->DEVEL_TAR + + +make save_amd64_devel + + + +TEST_IMG + + +cpu_features:amd64_test +test + + + +BUILD_IMG->TEST_IMG + + + + + +BUILD_IMG->INSTALL_ENV_IMG + + +copy install + + + +BUILD_TAR + + + +docker_amd64_build.tar + + + +BUILD_IMG->BUILD_TAR + + +make save_amd64_build + + + +TEST_TAR + + + +docker_amd64_test.tar + + + +TEST_IMG->TEST_TAR + + +make save_amd64_test + + + +INSTALL_ENV_IMG->INSTALL_DEVEL_IMG + + + + + +INSTALL_ENV_TAR + + + +docker_amd64_install_env.tar + + + +INSTALL_ENV_IMG->INSTALL_ENV_TAR + + +make save_amd64_install_env + + + +INSTALL_BUILD_IMG + + +cpu_features:amd64_install_build +install_build + + + +INSTALL_DEVEL_IMG->INSTALL_BUILD_IMG + + + + + +INSTALL_DEVEL_TAR + + + +docker_amd64_install_devel.tar + + + +INSTALL_DEVEL_IMG->INSTALL_DEVEL_TAR + + +make save_amd64_install_devel + + + +INSTALL_TEST_IMG + + +cpu_features:amd64_install_test +install_test + + + +INSTALL_BUILD_IMG->INSTALL_TEST_IMG + + + + + +INSTALL_BUILD_TAR + + + +docker_amd64_install_build.tar + + + +INSTALL_BUILD_IMG->INSTALL_BUILD_TAR + + +make save_amd64_install_build + + + +INSTALL_TEST_TAR + + + +docker_amd64_install_test.tar + + + +INSTALL_TEST_IMG->INSTALL_TEST_TAR + + +make save_amd64_install_test + + + diff --git a/cpu_features/ci/doc/generate_image.sh b/cpu_features/ci/doc/generate_image.sh new file mode 100755 index 0000000..15f1774 --- /dev/null +++ b/cpu_features/ci/doc/generate_image.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -ex + +rm -f ./*.svg ./*.png +for i in *.dot; do + plantuml -Tsvg "$i"; +done diff --git a/cpu_features/ci/docker/amd64/Dockerfile b/cpu_features/ci/docker/amd64/Dockerfile new file mode 100644 index 0000000..9b25e28 --- /dev/null +++ b/cpu_features/ci/docker/amd64/Dockerfile @@ -0,0 +1,48 @@ +# Create a virtual environment with all tools installed +# ref: https://hub.docker.com/_/ubuntu +FROM ubuntu:latest AS env +LABEL maintainer="corentinl@google.com" +# Install system build dependencies +ENV PATH=/usr/local/bin:$PATH +RUN apt-get update -qq \ +&& DEBIAN_FRONTEND=noninteractive apt-get install -yq git wget libssl-dev build-essential \ + ninja-build python3 pkgconf libglib2.0-dev \ +&& apt-get clean \ +&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* +ENTRYPOINT ["/usr/bin/bash", "-c"] +CMD ["/usr/bin/bash"] + +# Install CMake 3.21.3 +RUN wget "https://cmake.org/files/v3.21/cmake-3.21.3-linux-x86_64.sh" \ +&& chmod a+x cmake-3.21.3-linux-x86_64.sh \ +&& ./cmake-3.21.3-linux-x86_64.sh --prefix=/usr/local/ --skip-license \ +&& rm cmake-3.21.3-linux-x86_64.sh + +FROM env AS devel +WORKDIR /home/project +COPY . . + +FROM devel AS build +RUN cmake -version +RUN cmake -S. -Bbuild +RUN cmake --build build --target all -v +RUN cmake --build build --target install -v + +FROM build AS test +ENV CTEST_OUTPUT_ON_FAILURE=1 +RUN cmake --build build --target test -v + +# Test install rules +FROM env AS install_env +COPY --from=build /usr/local /usr/local/ + +FROM install_env AS install_devel +WORKDIR /home/sample +COPY ci/sample . + +FROM install_devel AS install_build +RUN cmake -S. -Bbuild +RUN cmake --build build --target all -v + +FROM install_build AS install_test +RUN cmake --build build --target test diff --git a/cpu_features/ci/docker/toolchain/Dockerfile b/cpu_features/ci/docker/toolchain/Dockerfile new file mode 100644 index 0000000..1bf25ed --- /dev/null +++ b/cpu_features/ci/docker/toolchain/Dockerfile @@ -0,0 +1,34 @@ +# Create a virtual environment with all tools installed +# ref: https://hub.docker.com/_/ubuntu +FROM ubuntu:latest AS env +LABEL maintainer="corentinl@google.com" +# Install system build dependencies +ENV PATH=/usr/local/bin:$PATH +RUN apt-get update -qq \ +&& DEBIAN_FRONTEND=noninteractive apt-get install -yq git wget libssl-dev build-essential \ + ninja-build python3 pkgconf libglib2.0-dev \ +&& apt-get clean \ +&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* +ENTRYPOINT ["/usr/bin/bash", "-c"] +CMD ["/usr/bin/bash"] + +# Install CMake 3.21.3 +RUN wget "https://cmake.org/files/v3.21/cmake-3.21.3-linux-x86_64.sh" \ +&& chmod a+x cmake-3.21.3-linux-x86_64.sh \ +&& ./cmake-3.21.3-linux-x86_64.sh --prefix=/usr/local/ --skip-license \ +&& rm cmake-3.21.3-linux-x86_64.sh + +FROM env AS devel +WORKDIR /home/project +COPY . . + +ARG TARGET +ENV TARGET ${TARGET:-unknown} + +FROM devel AS build +RUN cmake -version +RUN ./scripts/run_integration.sh build + +FROM build AS test +RUN ./scripts/run_integration.sh qemu +RUN ./scripts/run_integration.sh test diff --git a/cpu_features/ci/sample/CMakeLists.txt b/cpu_features/ci/sample/CMakeLists.txt new file mode 100644 index 0000000..b60e92f --- /dev/null +++ b/cpu_features/ci/sample/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.15) +project(Sample VERSION 1.0.0 LANGUAGES CXX) + +include(CTest) +find_package(CpuFeatures REQUIRED) + +add_executable(sample main.cpp) +target_compile_features(sample PUBLIC cxx_std_11) +set_target_properties(sample PROPERTIES + CXX_STANDARD 11 + CXX_STANDARD_REQUIRED ON + VERSION ${PROJECT_VERSION}) +target_link_libraries(sample PRIVATE CpuFeatures::cpu_features) + +if(BUILD_TESTING) + add_test(NAME sample_test COMMAND sample) +endif() + +include(GNUInstallDirs) +install(TARGETS sample + EXPORT SampleTargets + DESTINATION ${CMAKE_INSTALL_BIN_DIR}) diff --git a/cpu_features/ci/sample/main.cpp b/cpu_features/ci/sample/main.cpp new file mode 100644 index 0000000..45ec651 --- /dev/null +++ b/cpu_features/ci/sample/main.cpp @@ -0,0 +1,11 @@ +#include + +#include "cpuinfo_x86.h" + +using namespace cpu_features; + +int main(int /*argc*/, char** /*argv*/) { + static const X86Features features = GetX86Info().features; + std::cout << std::endl; + return 0; +} diff --git a/cpu_features/ci/vagrant/freebsd/Vagrantfile b/cpu_features/ci/vagrant/freebsd/Vagrantfile new file mode 100644 index 0000000..7ef7bfa --- /dev/null +++ b/cpu_features/ci/vagrant/freebsd/Vagrantfile @@ -0,0 +1,102 @@ +# -*- mode: ruby -*- +# vi: set ft=ruby : + +# All Vagrant configuration is done below. The "2" in Vagrant.configure +# configures the configuration version (we support older styles for +# backwards compatibility). Please don't change it unless you know what +# you're doing. +Vagrant.configure("2") do |config| + # The most common configuration options are documented and commented below. + # For a complete reference, please see the online documentation at + # https://docs.vagrantup.com. + + # Every Vagrant development environment requires a box. You can search for + # boxes at https://vagrantcloud.com/search. + config.vm.guest = :freebsd + config.vm.box = "generic/freebsd12" + + config.ssh.shell = "sh" + + # Disable automatic box update checking. If you disable this, then + # boxes will only be checked for updates when the user runs + # `vagrant box outdated`. This is not recommended. + # config.vm.box_check_update = false + + # Create a forwarded port mapping which allows access to a specific port + # within the machine from a port on the host machine. In the example below, + # accessing "localhost:8080" will access port 80 on the guest machine. + # NOTE: This will enable public access to the opened port + # config.vm.network "forwarded_port", guest: 80, host: 8080 + + # Create a forwarded port mapping which allows access to a specific port + # within the machine from a port on the host machine and only allow access + # via 127.0.0.1 to disable public access + # config.vm.network "forwarded_port", guest: 80, host: 8080, host_ip: "127.0.0.1" + + # Create a private network, which allows host-only access to the machine + # using a specific IP. + # config.vm.network "private_network", ip: "192.168.33.10" + + # Create a public network, which generally matched to bridged network. + # Bridged networks make the machine appear as another physical device on + # your network. + # config.vm.network "public_network" + + # Share an additional folder to the guest VM. The first argument is + # the path on the host to the actual folder. The second argument is + # the path on the guest to mount the folder. And the optional third + # argument is a set of non-required options. + #config.vm.synced_folder "../../..", "/home/vagrant/project" + config.vm.synced_folder ".", "/vagrant", id: "vagrant-root", disabled: true + + config.vm.provision "file", source: "../../../CMakeLists.txt", destination: "$HOME/project/" + config.vm.provision "file", source: "../../../cmake", destination: "$HOME/project/" + config.vm.provision "file", source: "../../../include", destination: "$HOME/project/" + config.vm.provision "file", source: "../../../src", destination: "$HOME/project/" + config.vm.provision "file", source: "../../../test", destination: "$HOME/project/" + + # Provider-specific configuration so you can fine-tune various + # backing providers for Vagrant. These expose provider-specific options. + # Example for VirtualBox: + # + # config.vm.provider "virtualbox" do |vb| + # # Display the VirtualBox GUI when booting the machine + # vb.gui = true + # + # # Customize the amount of memory on the VM: + # vb.memory = "1024" + # end + # + # View the documentation for the provider you are using for more + # information on available options. + + # Enable provisioning with a shell script. Additional provisioners such as + # Ansible, Chef, Docker, Puppet and Salt are also available. Please see the + # documentation for more information about their specific syntax and use. + # note: clang installed by default + config.vm.provision "env", type: "shell", inline:<<-SHELL + set -x + pkg update -f + pkg install -y git cmake + SHELL + config.vm.provision "devel", type: "shell", inline:<<-SHELL + set -x + cd project + ls + SHELL + config.vm.provision "configure", type: "shell", inline:<<-SHELL + set -x + cd project + cmake -S. -Bbuild -DBUILD_TESTING=ON + SHELL + config.vm.provision "build", type: "shell", inline:<<-SHELL + set -x + cd project + cmake --build build -v + SHELL + config.vm.provision "test", type: "shell", inline:<<-SHELL + set -x + cd project + cmake --build build --target test -v + SHELL +end diff --git a/cpu_features/cmake/CpuFeaturesConfig.cmake.in b/cpu_features/cmake/CpuFeaturesConfig.cmake.in new file mode 100644 index 0000000..e0bf10e --- /dev/null +++ b/cpu_features/cmake/CpuFeaturesConfig.cmake.in @@ -0,0 +1,3 @@ +# CpuFeatures CMake configuration file + +include("${CMAKE_CURRENT_LIST_DIR}/CpuFeaturesTargets.cmake") diff --git a/cpu_features/cmake/CpuFeaturesNdkCompatConfig.cmake.in b/cpu_features/cmake/CpuFeaturesNdkCompatConfig.cmake.in new file mode 100644 index 0000000..5a53ffd --- /dev/null +++ b/cpu_features/cmake/CpuFeaturesNdkCompatConfig.cmake.in @@ -0,0 +1,3 @@ +# CpuFeaturesNdkCompat CMake configuration file + +include("${CMAKE_CURRENT_LIST_DIR}/CpuFeaturesNdkCompatTargets.cmake") diff --git a/cpu_features/cmake/README.md b/cpu_features/cmake/README.md new file mode 100644 index 0000000..495b838 --- /dev/null +++ b/cpu_features/cmake/README.md @@ -0,0 +1,30 @@ +# CMake build instructions + +## Recommended usage : Incorporating cpu_features into a CMake project + +For API / ABI compatibility reasons, it is recommended to build and use +cpu_features in a subdirectory of your project or as an embedded dependency. + +This is similar to the recommended usage of the googletest framework +( https://github.com/google/googletest/blob/master/googletest/README.md ) + +Build and use step-by-step + + +1- Download cpu_features and copy it in a sub-directory in your project. +or add cpu_features as a git-submodule in your project + +2- You can then use the cmake command `add_subdirectory()` to include +cpu_features directly and use the `cpu_features` target in your project. + +3- Add the `cpu_features` target to the `target_link_libraries()` section of +your executable or of your library. + +## Disabling tests + +CMake default options for cpu_features is `Release` built type with tests +enabled. To disable testing set cmake `BUILD_TESTING` variable to `OFF`. +e.g. +```sh +cmake -S. -Bbuild -DBUILD_TESTING=OFF +``` diff --git a/cpu_features/cmake/googletest.CMakeLists.txt.in b/cpu_features/cmake/googletest.CMakeLists.txt.in new file mode 100644 index 0000000..8003c2c --- /dev/null +++ b/cpu_features/cmake/googletest.CMakeLists.txt.in @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 2.8.2) + +project(googletest-download NONE) + +include(ExternalProject) +ExternalProject_Add(googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG main + SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-src" + BINARY_DIR "${CMAKE_BINARY_DIR}/googletest-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) \ No newline at end of file diff --git a/cpu_features/include/cpu_features_cache_info.h b/cpu_features/include/cpu_features_cache_info.h new file mode 100644 index 0000000..1a61ee1 --- /dev/null +++ b/cpu_features/include/cpu_features_cache_info.h @@ -0,0 +1,54 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_COMMON_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_COMMON_H_ + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef enum { + CPU_FEATURE_CACHE_NULL = 0, + CPU_FEATURE_CACHE_DATA = 1, + CPU_FEATURE_CACHE_INSTRUCTION = 2, + CPU_FEATURE_CACHE_UNIFIED = 3, + CPU_FEATURE_CACHE_TLB = 4, + CPU_FEATURE_CACHE_DTLB = 5, + CPU_FEATURE_CACHE_STLB = 6, + CPU_FEATURE_CACHE_PREFETCH = 7 +} CacheType; + +typedef struct { + int level; + CacheType cache_type; + int cache_size; // Cache size in bytes + int ways; // Associativity, 0 undefined, 0xFF fully associative + int line_size; // Cache line size in bytes + int tlb_entries; // number of entries for TLB + int partitioning; // number of lines per sector +} CacheLevelInfo; + +// Increase this value if more cache levels are needed. +#ifndef CPU_FEATURES_MAX_CACHE_LEVEL +#define CPU_FEATURES_MAX_CACHE_LEVEL 10 +#endif +typedef struct { + int size; + CacheLevelInfo levels[CPU_FEATURES_MAX_CACHE_LEVEL]; +} CacheInfo; + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_COMMON_H_ diff --git a/cpu_features/include/cpu_features_macros.h b/cpu_features/include/cpu_features_macros.h new file mode 100644 index 0000000..8f5c38c --- /dev/null +++ b/cpu_features/include/cpu_features_macros.h @@ -0,0 +1,242 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_ +#define CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_ + +//////////////////////////////////////////////////////////////////////////////// +// Architectures +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__pnacl__) || defined(__CLR_VER) +#define CPU_FEATURES_ARCH_VM +#endif + +#if (defined(_M_IX86) || defined(__i386__)) && !defined(CPU_FEATURES_ARCH_VM) +#define CPU_FEATURES_ARCH_X86_32 +#endif + +#if (defined(_M_X64) || defined(__x86_64__)) && !defined(CPU_FEATURES_ARCH_VM) +#define CPU_FEATURES_ARCH_X86_64 +#endif + +#if defined(CPU_FEATURES_ARCH_X86_32) || defined(CPU_FEATURES_ARCH_X86_64) +#define CPU_FEATURES_ARCH_X86 +#endif + +#if (defined(__arm__) || defined(_M_ARM)) +#define CPU_FEATURES_ARCH_ARM +#endif + +#if defined(__aarch64__) +#define CPU_FEATURES_ARCH_AARCH64 +#endif + +#if (defined(CPU_FEATURES_ARCH_AARCH64) || defined(CPU_FEATURES_ARCH_ARM)) +#define CPU_FEATURES_ARCH_ANY_ARM +#endif + +#if defined(__mips64) +#define CPU_FEATURES_ARCH_MIPS64 +#endif + +#if defined(__mips__) && !defined(__mips64) // mips64 also declares __mips__ +#define CPU_FEATURES_ARCH_MIPS32 +#endif + +#if defined(CPU_FEATURES_ARCH_MIPS32) || defined(CPU_FEATURES_ARCH_MIPS64) +#define CPU_FEATURES_ARCH_MIPS +#endif + +#if defined(__powerpc__) +#define CPU_FEATURES_ARCH_PPC +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Os +//////////////////////////////////////////////////////////////////////////////// + +#if (defined(__freebsd__) || defined(__FreeBSD__)) +#define CPU_FEATURES_OS_FREEBSD +#endif + +#if defined(__ANDROID__) +#define CPU_FEATURES_OS_ANDROID +#endif + +#if defined(__linux__) && !defined(CPU_FEATURES_OS_FREEBSD) && \ + !defined(CPU_FEATURES_OS_ANDROID) +#define CPU_FEATURES_OS_LINUX +#endif + +#if (defined(_WIN64) || defined(_WIN32)) +#define CPU_FEATURES_OS_WINDOWS +#endif + +#if (defined(__apple__) || defined(__APPLE__) || defined(__MACH__)) +// From https://stackoverflow.com/a/49560690 +#include "TargetConditionals.h" +#if defined(TARGET_OS_OSX) +#define CPU_FEATURES_OS_MACOS +#endif +#if defined(TARGET_OS_IPHONE) +// This is set for any non-Mac Apple products (IOS, TV, WATCH) +#define CPU_FEATURES_OS_IPHONE +#endif +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Compilers +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__clang__) +#define CPU_FEATURES_COMPILER_CLANG +#endif + +#if defined(__GNUC__) && !defined(__clang__) +#define CPU_FEATURES_COMPILER_GCC +#endif + +#if defined(_MSC_VER) +#define CPU_FEATURES_COMPILER_MSC +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Cpp +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__cplusplus) +#define CPU_FEATURES_START_CPP_NAMESPACE \ + namespace cpu_features { \ + extern "C" { +#define CPU_FEATURES_END_CPP_NAMESPACE \ + } \ + } +#else +#define CPU_FEATURES_START_CPP_NAMESPACE +#define CPU_FEATURES_END_CPP_NAMESPACE +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Compiler flags +//////////////////////////////////////////////////////////////////////////////// + +// Use the following to check if a feature is known to be available at +// compile time. See README.md for an example. +#if defined(CPU_FEATURES_ARCH_X86) + +#if defined(__AES__) +#define CPU_FEATURES_COMPILED_X86_AES 1 +#else +#define CPU_FEATURES_COMPILED_X86_AES 0 +#endif // defined(__AES__) + +#if defined(__F16C__) +#define CPU_FEATURES_COMPILED_X86_F16C 1 +#else +#define CPU_FEATURES_COMPILED_X86_F16C 0 +#endif // defined(__F16C__) + +#if defined(__BMI__) +#define CPU_FEATURES_COMPILED_X86_BMI 1 +#else +#define CPU_FEATURES_COMPILED_X86_BMI 0 +#endif // defined(__BMI__) + +#if defined(__BMI2__) +#define CPU_FEATURES_COMPILED_X86_BMI2 1 +#else +#define CPU_FEATURES_COMPILED_X86_BMI2 0 +#endif // defined(__BMI2__) + +#if (defined(__SSE__) || (_M_IX86_FP >= 1)) +#define CPU_FEATURES_COMPILED_X86_SSE 1 +#else +#define CPU_FEATURES_COMPILED_X86_SSE 0 +#endif + +#if (defined(__SSE2__) || (_M_IX86_FP >= 2)) +#define CPU_FEATURES_COMPILED_X86_SSE2 1 +#else +#define CPU_FEATURES_COMPILED_X86_SSE2 0 +#endif + +#if defined(__SSE3__) +#define CPU_FEATURES_COMPILED_X86_SSE3 1 +#else +#define CPU_FEATURES_COMPILED_X86_SSE3 0 +#endif // defined(__SSE3__) + +#if defined(__SSSE3__) +#define CPU_FEATURES_COMPILED_X86_SSSE3 1 +#else +#define CPU_FEATURES_COMPILED_X86_SSSE3 0 +#endif // defined(__SSSE3__) + +#if defined(__SSE4_1__) +#define CPU_FEATURES_COMPILED_X86_SSE4_1 1 +#else +#define CPU_FEATURES_COMPILED_X86_SSE4_1 0 +#endif // defined(__SSE4_1__) + +#if defined(__SSE4_2__) +#define CPU_FEATURES_COMPILED_X86_SSE4_2 1 +#else +#define CPU_FEATURES_COMPILED_X86_SSE4_2 0 +#endif // defined(__SSE4_2__) + +#if defined(__AVX__) +#define CPU_FEATURES_COMPILED_X86_AVX 1 +#else +#define CPU_FEATURES_COMPILED_X86_AVX 0 +#endif // defined(__AVX__) + +#if defined(__AVX2__) +#define CPU_FEATURES_COMPILED_X86_AVX2 1 +#else +#define CPU_FEATURES_COMPILED_X86_AVX2 0 +#endif // defined(__AVX2__) + +#endif // defined(CPU_FEATURES_ARCH_X86) + +#if defined(CPU_FEATURES_ARCH_ANY_ARM) +#if defined(__ARM_NEON__) +#define CPU_FEATURES_COMPILED_ANY_ARM_NEON 1 +#else +#define CPU_FEATURES_COMPILED_ANY_ARM_NEON 0 +#endif // defined(__ARM_NEON__) +#endif // defined(CPU_FEATURES_ARCH_ANY_ARM) + +#if defined(CPU_FEATURES_ARCH_MIPS) +#if defined(__mips_msa) +#define CPU_FEATURES_COMPILED_MIPS_MSA 1 +#else +#define CPU_FEATURES_COMPILED_MIPS_MSA 0 +#endif // defined(__mips_msa) +#endif // defined(CPU_FEATURES_ARCH_MIPS) + +//////////////////////////////////////////////////////////////////////////////// +// Utils +//////////////////////////////////////////////////////////////////////////////// + +// Communicates to the compiler that the block is unreachable +#if defined(CPU_FEATURES_COMPILER_CLANG) || defined(CPU_FEATURES_COMPILER_GCC) +#define UNREACHABLE() __builtin_unreachable() +#elif defined(CPU_FEATURES_COMPILER_MSC) +#define UNREACHABLE() __assume(0) +#else +#define UNREACHABLE() +#endif + +#endif // CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_ diff --git a/cpu_features/include/cpuinfo_aarch64.h b/cpu_features/include/cpuinfo_aarch64.h new file mode 100644 index 0000000..1b57d21 --- /dev/null +++ b/cpu_features/include/cpuinfo_aarch64.h @@ -0,0 +1,158 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_ + +#include "cpu_features_cache_info.h" +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + int fp : 1; // Floating-point. + int asimd : 1; // Advanced SIMD. + int evtstrm : 1; // Generic timer generated events. + int aes : 1; // Hardware-accelerated Advanced Encryption Standard. + int pmull : 1; // Polynomial multiply long. + int sha1 : 1; // Hardware-accelerated SHA1. + int sha2 : 1; // Hardware-accelerated SHA2-256. + int crc32 : 1; // Hardware-accelerated CRC-32. + int atomics : 1; // Armv8.1 atomic instructions. + int fphp : 1; // Half-precision floating point support. + int asimdhp : 1; // Advanced SIMD half-precision support. + int cpuid : 1; // Access to certain ID registers. + int asimdrdm : 1; // Rounding Double Multiply Accumulate/Subtract. + int jscvt : 1; // Support for JavaScript conversion. + int fcma : 1; // Floating point complex numbers. + int lrcpc : 1; // Support for weaker release consistency. + int dcpop : 1; // Data persistence writeback. + int sha3 : 1; // Hardware-accelerated SHA3. + int sm3 : 1; // Hardware-accelerated SM3. + int sm4 : 1; // Hardware-accelerated SM4. + int asimddp : 1; // Dot product instruction. + int sha512 : 1; // Hardware-accelerated SHA512. + int sve : 1; // Scalable Vector Extension. + int asimdfhm : 1; // Additional half-precision instructions. + int dit : 1; // Data independent timing. + int uscat : 1; // Unaligned atomics support. + int ilrcpc : 1; // Additional support for weaker release consistency. + int flagm : 1; // Flag manipulation instructions. + int ssbs : 1; // Speculative Store Bypass Safe PSTATE bit. + int sb : 1; // Speculation barrier. + int paca : 1; // Address authentication. + int pacg : 1; // Generic authentication. + int dcpodp : 1; // Data cache clean to point of persistence. + int sve2 : 1; // Scalable Vector Extension (version 2). + int sveaes : 1; // SVE AES instructions. + int svepmull : 1; // SVE polynomial multiply long instructions. + int svebitperm : 1; // SVE bit permute instructions. + int svesha3 : 1; // SVE SHA3 instructions. + int svesm4 : 1; // SVE SM4 instructions. + int flagm2 : 1; // Additional flag manipulation instructions. + int frint : 1; // Floating point to integer rounding. + int svei8mm : 1; // SVE Int8 matrix multiplication instructions. + int svef32mm : 1; // SVE FP32 matrix multiplication instruction. + int svef64mm : 1; // SVE FP64 matrix multiplication instructions. + int svebf16 : 1; // SVE BFloat16 instructions. + int i8mm : 1; // Int8 matrix multiplication instructions. + int bf16 : 1; // BFloat16 instructions. + int dgh : 1; // Data Gathering Hint instruction. + int rng : 1; // True random number generator support. + int bti : 1; // Branch target identification. + int mte : 1; // Memory tagging extension. + + // Make sure to update Aarch64FeaturesEnum below if you add a field here. +} Aarch64Features; + +typedef struct { + Aarch64Features features; + int implementer; + int variant; + int part; + int revision; +} Aarch64Info; + +Aarch64Info GetAarch64Info(void); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + AARCH64_FP, + AARCH64_ASIMD, + AARCH64_EVTSTRM, + AARCH64_AES, + AARCH64_PMULL, + AARCH64_SHA1, + AARCH64_SHA2, + AARCH64_CRC32, + AARCH64_ATOMICS, + AARCH64_FPHP, + AARCH64_ASIMDHP, + AARCH64_CPUID, + AARCH64_ASIMDRDM, + AARCH64_JSCVT, + AARCH64_FCMA, + AARCH64_LRCPC, + AARCH64_DCPOP, + AARCH64_SHA3, + AARCH64_SM3, + AARCH64_SM4, + AARCH64_ASIMDDP, + AARCH64_SHA512, + AARCH64_SVE, + AARCH64_ASIMDFHM, + AARCH64_DIT, + AARCH64_USCAT, + AARCH64_ILRCPC, + AARCH64_FLAGM, + AARCH64_SSBS, + AARCH64_SB, + AARCH64_PACA, + AARCH64_PACG, + AARCH64_DCPODP, + AARCH64_SVE2, + AARCH64_SVEAES, + AARCH64_SVEPMULL, + AARCH64_SVEBITPERM, + AARCH64_SVESHA3, + AARCH64_SVESM4, + AARCH64_FLAGM2, + AARCH64_FRINT, + AARCH64_SVEI8MM, + AARCH64_SVEF32MM, + AARCH64_SVEF64MM, + AARCH64_SVEBF16, + AARCH64_I8MM, + AARCH64_BF16, + AARCH64_DGH, + AARCH64_RNG, + AARCH64_BTI, + AARCH64_MTE, + AARCH64_LAST_, +} Aarch64FeaturesEnum; + +int GetAarch64FeaturesEnumValue(const Aarch64Features* features, + Aarch64FeaturesEnum value); + +const char* GetAarch64FeaturesEnumName(Aarch64FeaturesEnum); + +CPU_FEATURES_END_CPP_NAMESPACE + +#if !defined(CPU_FEATURES_ARCH_AARCH64) +#error "Including cpuinfo_aarch64.h from a non-aarch64 target." +#endif + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_ diff --git a/cpu_features/include/cpuinfo_arm.h b/cpu_features/include/cpuinfo_arm.h new file mode 100644 index 0000000..0952d7c --- /dev/null +++ b/cpu_features/include/cpuinfo_arm.h @@ -0,0 +1,121 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_ + +#include // uint32_t + +#include "cpu_features_cache_info.h" +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + int swp : 1; // SWP instruction (atomic read-modify-write) + int half : 1; // Half-word loads and stores + int thumb : 1; // Thumb (16-bit instruction set) + int _26bit : 1; // "26 Bit" Model (Processor status register folded into + // program counter) + int fastmult : 1; // 32x32->64-bit multiplication + int fpa : 1; // Floating point accelerator + int vfp : 1; // Vector Floating Point. + int edsp : 1; // DSP extensions (the 'e' variant of the ARM9 CPUs, and all + // others above) + int java : 1; // Jazelle (Java bytecode accelerator) + int iwmmxt : 1; // Intel Wireless MMX Technology. + int crunch : 1; // MaverickCrunch coprocessor + int thumbee : 1; // ThumbEE + int neon : 1; // Advanced SIMD. + int vfpv3 : 1; // VFP version 3 + int vfpv3d16 : 1; // VFP version 3 with 16 D-registers + int tls : 1; // TLS register + int vfpv4 : 1; // VFP version 4 with fast context switching + int idiva : 1; // SDIV and UDIV hardware division in ARM mode. + int idivt : 1; // SDIV and UDIV hardware division in Thumb mode. + int vfpd32 : 1; // VFP with 32 D-registers + int lpae : 1; // Large Physical Address Extension (>4GB physical memory on + // 32-bit architecture) + int evtstrm : 1; // kernel event stream using generic architected timer + int aes : 1; // Hardware-accelerated Advanced Encryption Standard. + int pmull : 1; // Polynomial multiply long. + int sha1 : 1; // Hardware-accelerated SHA1. + int sha2 : 1; // Hardware-accelerated SHA2-256. + int crc32 : 1; // Hardware-accelerated CRC-32. + + // Make sure to update ArmFeaturesEnum below if you add a field here. +} ArmFeatures; + +typedef struct { + ArmFeatures features; + int implementer; + int architecture; + int variant; + int part; + int revision; +} ArmInfo; + +// TODO(user): Add macros to know which features are present at compile +// time. + +ArmInfo GetArmInfo(void); + +// Compute CpuId from ArmInfo. +uint32_t GetArmCpuId(const ArmInfo* const info); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + ARM_SWP, + ARM_HALF, + ARM_THUMB, + ARM_26BIT, + ARM_FASTMULT, + ARM_FPA, + ARM_VFP, + ARM_EDSP, + ARM_JAVA, + ARM_IWMMXT, + ARM_CRUNCH, + ARM_THUMBEE, + ARM_NEON, + ARM_VFPV3, + ARM_VFPV3D16, + ARM_TLS, + ARM_VFPV4, + ARM_IDIVA, + ARM_IDIVT, + ARM_VFPD32, + ARM_LPAE, + ARM_EVTSTRM, + ARM_AES, + ARM_PMULL, + ARM_SHA1, + ARM_SHA2, + ARM_CRC32, + ARM_LAST_, +} ArmFeaturesEnum; + +int GetArmFeaturesEnumValue(const ArmFeatures* features, ArmFeaturesEnum value); + +const char* GetArmFeaturesEnumName(ArmFeaturesEnum); + +CPU_FEATURES_END_CPP_NAMESPACE + +#if !defined(CPU_FEATURES_ARCH_ARM) +#error "Including cpuinfo_arm.h from a non-arm target." +#endif + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_ diff --git a/cpu_features/include/cpuinfo_mips.h b/cpu_features/include/cpuinfo_mips.h new file mode 100644 index 0000000..9e5e7fc --- /dev/null +++ b/cpu_features/include/cpuinfo_mips.h @@ -0,0 +1,60 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_ + +#include "cpu_features_cache_info.h" +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + int msa : 1; // MIPS SIMD Architecture + // https://www.mips.com/products/architectures/ase/simd/ + int eva : 1; // Enhanced Virtual Addressing + // https://www.mips.com/products/architectures/mips64/ + int r6 : 1; // True if is release 6 of the processor. + + // Make sure to update MipsFeaturesEnum below if you add a field here. +} MipsFeatures; + +typedef struct { + MipsFeatures features; +} MipsInfo; + +MipsInfo GetMipsInfo(void); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + MIPS_MSA, + MIPS_EVA, + MIPS_R6, + MIPS_LAST_, +} MipsFeaturesEnum; + +int GetMipsFeaturesEnumValue(const MipsFeatures* features, + MipsFeaturesEnum value); + +const char* GetMipsFeaturesEnumName(MipsFeaturesEnum); + +CPU_FEATURES_END_CPP_NAMESPACE + +#if !defined(CPU_FEATURES_ARCH_MIPS) +#error "Including cpuinfo_mips.h from a non-mips target." +#endif + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_ diff --git a/cpu_features/include/cpuinfo_ppc.h b/cpu_features/include/cpuinfo_ppc.h new file mode 100644 index 0000000..da3e696 --- /dev/null +++ b/cpu_features/include/cpuinfo_ppc.h @@ -0,0 +1,149 @@ +// Copyright 2018 IBM +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_ + +#include "cpu_features_cache_info.h" +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + int ppc32 : 1; + int ppc64 : 1; + int ppc601 : 1; + int altivec : 1; + int fpu : 1; + int mmu : 1; + int mac_4xx : 1; + int unifiedcache : 1; + int spe : 1; + int efpsingle : 1; + int efpdouble : 1; + int no_tb : 1; + int power4 : 1; + int power5 : 1; + int power5plus : 1; + int cell : 1; + int booke : 1; + int smt : 1; + int icachesnoop : 1; + int arch205 : 1; + int pa6t : 1; + int dfp : 1; + int power6ext : 1; + int arch206 : 1; + int vsx : 1; + int pseries_perfmon_compat : 1; + int truele : 1; + int ppcle : 1; + int arch207 : 1; + int htm : 1; + int dscr : 1; + int ebb : 1; + int isel : 1; + int tar : 1; + int vcrypto : 1; + int htm_nosc : 1; + int arch300 : 1; + int ieee128 : 1; + int darn : 1; + int scv : 1; + int htm_no_suspend : 1; + + // Make sure to update PPCFeaturesEnum below if you add a field here. +} PPCFeatures; + +typedef struct { + PPCFeatures features; +} PPCInfo; + +PPCInfo GetPPCInfo(void); + +typedef struct { + char platform[64]; // 0 terminated string + char base_platform[64]; // 0 terminated string +} PPCPlatformTypeStrings; + +typedef struct { + char platform[64]; // 0 terminated string + char model[64]; // 0 terminated string + char machine[64]; // 0 terminated string + char cpu[64]; // 0 terminated string + PPCPlatformTypeStrings type; +} PPCPlatformStrings; + +PPCPlatformStrings GetPPCPlatformStrings(void); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + PPC_32, /* 32 bit mode execution */ + PPC_64, /* 64 bit mode execution */ + PPC_601_INSTR, /* Old POWER ISA */ + PPC_HAS_ALTIVEC, /* SIMD Unit*/ + PPC_HAS_FPU, /* Floating Point Unit */ + PPC_HAS_MMU, /* Memory management unit */ + PPC_HAS_4xxMAC, + PPC_UNIFIED_CACHE, /* Unified instruction and data cache */ + PPC_HAS_SPE, /* Signal processing extention unit */ + PPC_HAS_EFP_SINGLE, /* SPE single precision fpu */ + PPC_HAS_EFP_DOUBLE, /* SPE double precision fpu */ + PPC_NO_TB, /* No timebase */ + PPC_POWER4, + PPC_POWER5, + PPC_POWER5_PLUS, + PPC_CELL, /* Cell broadband engine */ + PPC_BOOKE, /* Embedded ISA */ + PPC_SMT, /* Simultaneous multi-threading */ + PPC_ICACHE_SNOOP, + PPC_ARCH_2_05, /* ISA 2.05 - POWER6 */ + PPC_PA6T, /* PA Semi 6T core ISA */ + PPC_HAS_DFP, /* Decimal floating point unit */ + PPC_POWER6_EXT, + PPC_ARCH_2_06, /* ISA 2.06 - POWER7 */ + PPC_HAS_VSX, /* Vector-scalar extension */ + PPC_PSERIES_PERFMON_COMPAT, /* Set of backwards compatibile performance + monitoring events */ + PPC_TRUE_LE, + PPC_PPC_LE, + PPC_ARCH_2_07, /* ISA 2.07 - POWER8 */ + PPC_HTM, /* Hardware Transactional Memory */ + PPC_DSCR, /* Data stream control register */ + PPC_EBB, /* Event base branching */ + PPC_ISEL, /* Integer select instructions */ + PPC_TAR, /* Target address register */ + PPC_VEC_CRYPTO, /* Vector cryptography instructions */ + PPC_HTM_NOSC, /* Transactions aborted when syscall made*/ + PPC_ARCH_3_00, /* ISA 3.00 - POWER9 */ + PPC_HAS_IEEE128, /* VSX IEEE Binary Float 128-bit */ + PPC_DARN, /* Deliver a random number instruction */ + PPC_SCV, /* scv syscall */ + PPC_HTM_NO_SUSPEND, /* TM w/out suspended state */ + PPC_LAST_, +} PPCFeaturesEnum; + +int GetPPCFeaturesEnumValue(const PPCFeatures* features, PPCFeaturesEnum value); + +const char* GetPPCFeaturesEnumName(PPCFeaturesEnum); + +CPU_FEATURES_END_CPP_NAMESPACE + +#if !defined(CPU_FEATURES_ARCH_PPC) +#error "Including cpuinfo_ppc.h from a non-ppc target." +#endif + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_ diff --git a/cpu_features/include/cpuinfo_x86.h b/cpu_features/include/cpuinfo_x86.h new file mode 100644 index 0000000..adb1189 --- /dev/null +++ b/cpu_features/include/cpuinfo_x86.h @@ -0,0 +1,255 @@ +// Copyright 2017 Google LLC +// Copyright 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_X86_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_X86_H_ + +#include "cpu_features_cache_info.h" +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// CPUID Vendors +#define CPU_FEATURES_VENDOR_GENUINE_INTEL "GenuineIntel" +#define CPU_FEATURES_VENDOR_AUTHENTIC_AMD "AuthenticAMD" +#define CPU_FEATURES_VENDOR_HYGON_GENUINE "HygonGenuine" + +// See https://en.wikipedia.org/wiki/CPUID for a list of x86 cpu features. +// The field names are based on the short name provided in the wikipedia tables. +typedef struct { + int fpu : 1; + int tsc : 1; + int cx8 : 1; + int clfsh : 1; + int mmx : 1; + int aes : 1; + int erms : 1; + int f16c : 1; + int fma4 : 1; + int fma3 : 1; + int vaes : 1; + int vpclmulqdq : 1; + int bmi1 : 1; + int hle : 1; + int bmi2 : 1; + int rtm : 1; + int rdseed : 1; + int clflushopt : 1; + int clwb : 1; + + int sse : 1; + int sse2 : 1; + int sse3 : 1; + int ssse3 : 1; + int sse4_1 : 1; + int sse4_2 : 1; + int sse4a : 1; + + int avx : 1; + int avx2 : 1; + + int avx512f : 1; + int avx512cd : 1; + int avx512er : 1; + int avx512pf : 1; + int avx512bw : 1; + int avx512dq : 1; + int avx512vl : 1; + int avx512ifma : 1; + int avx512vbmi : 1; + int avx512vbmi2 : 1; + int avx512vnni : 1; + int avx512bitalg : 1; + int avx512vpopcntdq : 1; + int avx512_4vnniw : 1; + int avx512_4vbmi2 : 1; + int avx512_second_fma : 1; + int avx512_4fmaps : 1; + int avx512_bf16 : 1; + int avx512_vp2intersect : 1; + int amx_bf16 : 1; + int amx_tile : 1; + int amx_int8 : 1; + + int pclmulqdq : 1; + int smx : 1; + int sgx : 1; + int cx16 : 1; // aka. CMPXCHG16B + int sha : 1; + int popcnt : 1; + int movbe : 1; + int rdrnd : 1; + + int dca : 1; + int ss : 1; + int adx : 1; + // Make sure to update X86FeaturesEnum below if you add a field here. +} X86Features; + +typedef struct { + X86Features features; + int family; + int model; + int stepping; + char vendor[13]; // 0 terminated string +} X86Info; + +// Calls cpuid and returns an initialized X86info. +X86Info GetX86Info(void); + +// Returns cache hierarchy informations. +// Can call cpuid multiple times. +// Only works on Intel CPU at the moment. +CacheInfo GetX86CacheInfo(void); + +typedef enum { + X86_UNKNOWN, + INTEL_80486, // 80486 + INTEL_P5, // P5 + INTEL_LAKEMONT, // LAKEMONT + INTEL_CORE, // CORE + INTEL_PNR, // PENRYN + INTEL_NHM, // NEHALEM + INTEL_ATOM_BNL, // BONNELL + INTEL_WSM, // WESTMERE + INTEL_SNB, // SANDYBRIDGE + INTEL_IVB, // IVYBRIDGE + INTEL_ATOM_SMT, // SILVERMONT + INTEL_HSW, // HASWELL + INTEL_BDW, // BROADWELL + INTEL_SKL, // SKYLAKE + INTEL_ATOM_GMT, // GOLDMONT + INTEL_KBL, // KABY LAKE + INTEL_CFL, // COFFEE LAKE + INTEL_WHL, // WHISKEY LAKE + INTEL_CNL, // CANNON LAKE + INTEL_ICL, // ICE LAKE + INTEL_TGL, // TIGER LAKE + INTEL_SPR, // SAPPHIRE RAPIDS + INTEL_ADL, // ALDER LAKE + INTEL_RCL, // ROCKET LAKE + INTEL_KNIGHTS_M, // KNIGHTS MILL + INTEL_KNIGHTS_L, // KNIGHTS LANDING + INTEL_KNIGHTS_F, // KNIGHTS FERRY + INTEL_KNIGHTS_C, // KNIGHTS CORNER + INTEL_NETBURST, // NETBURST + AMD_HAMMER, // K8 HAMMER + AMD_K10, // K10 + AMD_K11, // K11 + AMD_K12, // K12 + AMD_BOBCAT, // K14 BOBCAT + AMD_PILEDRIVER, // K15 PILEDRIVER + AMD_STREAMROLLER, // K15 STREAMROLLER + AMD_EXCAVATOR, // K15 EXCAVATOR + AMD_BULLDOZER, // K15 BULLDOZER + AMD_JAGUAR, // K16 JAGUAR + AMD_PUMA, // K16 PUMA + AMD_ZEN, // K17 ZEN + AMD_ZEN_PLUS, // K17 ZEN+ + AMD_ZEN2, // K17 ZEN 2 + AMD_ZEN3, // K19 ZEN 3 + X86_MICROARCHITECTURE_LAST_, +} X86Microarchitecture; + +// Returns the underlying microarchitecture by looking at X86Info's vendor, +// family and model. +X86Microarchitecture GetX86Microarchitecture(const X86Info* info); + +// Calls cpuid and fills the brand_string. +// - brand_string *must* be of size 49 (beware of array decaying). +// - brand_string will be zero terminated. +void FillX86BrandString(char brand_string[49]); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + X86_FPU, + X86_TSC, + X86_CX8, + X86_CLFSH, + X86_MMX, + X86_AES, + X86_ERMS, + X86_F16C, + X86_FMA4, + X86_FMA3, + X86_VAES, + X86_VPCLMULQDQ, + X86_BMI1, + X86_HLE, + X86_BMI2, + X86_RTM, + X86_RDSEED, + X86_CLFLUSHOPT, + X86_CLWB, + X86_SSE, + X86_SSE2, + X86_SSE3, + X86_SSSE3, + X86_SSE4_1, + X86_SSE4_2, + X86_SSE4A, + X86_AVX, + X86_AVX2, + X86_AVX512F, + X86_AVX512CD, + X86_AVX512ER, + X86_AVX512PF, + X86_AVX512BW, + X86_AVX512DQ, + X86_AVX512VL, + X86_AVX512IFMA, + X86_AVX512VBMI, + X86_AVX512VBMI2, + X86_AVX512VNNI, + X86_AVX512BITALG, + X86_AVX512VPOPCNTDQ, + X86_AVX512_4VNNIW, + X86_AVX512_4VBMI2, + X86_AVX512_SECOND_FMA, + X86_AVX512_4FMAPS, + X86_AVX512_BF16, + X86_AVX512_VP2INTERSECT, + X86_AMX_BF16, + X86_AMX_TILE, + X86_AMX_INT8, + X86_PCLMULQDQ, + X86_SMX, + X86_SGX, + X86_CX16, + X86_SHA, + X86_POPCNT, + X86_MOVBE, + X86_RDRND, + X86_DCA, + X86_SS, + X86_ADX, + X86_LAST_, +} X86FeaturesEnum; + +int GetX86FeaturesEnumValue(const X86Features* features, X86FeaturesEnum value); + +const char* GetX86FeaturesEnumName(X86FeaturesEnum); + +const char* GetX86MicroarchitectureName(X86Microarchitecture); + +CPU_FEATURES_END_CPP_NAMESPACE + +#if !defined(CPU_FEATURES_ARCH_X86) +#error "Including cpuinfo_x86.h from a non-x86 target." +#endif + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_X86_H_ diff --git a/cpu_features/include/internal/bit_utils.h b/cpu_features/include/internal/bit_utils.h new file mode 100644 index 0000000..3467ff9 --- /dev/null +++ b/cpu_features/include/internal/bit_utils.h @@ -0,0 +1,40 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_ + +#include +#include +#include + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +inline static bool IsBitSet(uint32_t reg, uint32_t bit) { + return (reg >> bit) & 0x1; +} + +inline static uint32_t ExtractBitRange(uint32_t reg, uint32_t msb, + uint32_t lsb) { + const uint64_t bits = msb - lsb + 1ULL; + const uint64_t mask = (1ULL << bits) - 1ULL; + assert(msb >= lsb); + return (reg >> lsb) & mask; +} + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_ diff --git a/cpu_features/include/internal/cpuid_x86.h b/cpu_features/include/internal/cpuid_x86.h new file mode 100644 index 0000000..33327a4 --- /dev/null +++ b/cpu_features/include/internal/cpuid_x86.h @@ -0,0 +1,37 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_ + +#include + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// A struct to hold the result of a call to cpuid. +typedef struct { + uint32_t eax, ebx, ecx, edx; +} Leaf; + +// Returns the result of a call to the cpuid instruction. +Leaf GetCpuidLeaf(uint32_t leaf_id, int ecx); + +// Returns the eax value of the XCR0 register. +uint32_t GetXCR0Eax(void); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_ diff --git a/cpu_features/include/internal/filesystem.h b/cpu_features/include/internal/filesystem.h new file mode 100644 index 0000000..d8f2f6a --- /dev/null +++ b/cpu_features/include/internal/filesystem.h @@ -0,0 +1,39 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// An interface for the filesystem that allows mocking the filesystem in +// unittests. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_ + +#include +#include + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// Same as linux "open(filename, O_RDONLY)", retries automatically on EINTR. +int CpuFeatures_OpenFile(const char* filename); + +// Same as linux "read(file_descriptor, buffer, buffer_size)", retries +// automatically on EINTR. +int CpuFeatures_ReadFile(int file_descriptor, void* buffer, size_t buffer_size); + +// Same as linux "close(file_descriptor)". +void CpuFeatures_CloseFile(int file_descriptor); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_ diff --git a/cpu_features/include/internal/hwcaps.h b/cpu_features/include/internal/hwcaps.h new file mode 100644 index 0000000..d7fc782 --- /dev/null +++ b/cpu_features/include/internal/hwcaps.h @@ -0,0 +1,190 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Interface to retrieve hardware capabilities. It relies on Linux's getauxval +// or `/proc/self/auxval` under the hood. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_ + +#include +#include + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// To avoid depending on the linux kernel we reproduce the architecture specific +// constants here. + +// http://elixir.free-electrons.com/linux/latest/source/arch/arm64/include/uapi/asm/hwcap.h +#define AARCH64_HWCAP_FP (1UL << 0) +#define AARCH64_HWCAP_ASIMD (1UL << 1) +#define AARCH64_HWCAP_EVTSTRM (1UL << 2) +#define AARCH64_HWCAP_AES (1UL << 3) +#define AARCH64_HWCAP_PMULL (1UL << 4) +#define AARCH64_HWCAP_SHA1 (1UL << 5) +#define AARCH64_HWCAP_SHA2 (1UL << 6) +#define AARCH64_HWCAP_CRC32 (1UL << 7) +#define AARCH64_HWCAP_ATOMICS (1UL << 8) +#define AARCH64_HWCAP_FPHP (1UL << 9) +#define AARCH64_HWCAP_ASIMDHP (1UL << 10) +#define AARCH64_HWCAP_CPUID (1UL << 11) +#define AARCH64_HWCAP_ASIMDRDM (1UL << 12) +#define AARCH64_HWCAP_JSCVT (1UL << 13) +#define AARCH64_HWCAP_FCMA (1UL << 14) +#define AARCH64_HWCAP_LRCPC (1UL << 15) +#define AARCH64_HWCAP_DCPOP (1UL << 16) +#define AARCH64_HWCAP_SHA3 (1UL << 17) +#define AARCH64_HWCAP_SM3 (1UL << 18) +#define AARCH64_HWCAP_SM4 (1UL << 19) +#define AARCH64_HWCAP_ASIMDDP (1UL << 20) +#define AARCH64_HWCAP_SHA512 (1UL << 21) +#define AARCH64_HWCAP_SVE (1UL << 22) +#define AARCH64_HWCAP_ASIMDFHM (1UL << 23) +#define AARCH64_HWCAP_DIT (1UL << 24) +#define AARCH64_HWCAP_USCAT (1UL << 25) +#define AARCH64_HWCAP_ILRCPC (1UL << 26) +#define AARCH64_HWCAP_FLAGM (1UL << 27) +#define AARCH64_HWCAP_SSBS (1UL << 28) +#define AARCH64_HWCAP_SB (1UL << 29) +#define AARCH64_HWCAP_PACA (1UL << 30) +#define AARCH64_HWCAP_PACG (1UL << 31) + +#define AARCH64_HWCAP2_DCPODP (1UL << 0) +#define AARCH64_HWCAP2_SVE2 (1UL << 1) +#define AARCH64_HWCAP2_SVEAES (1UL << 2) +#define AARCH64_HWCAP2_SVEPMULL (1UL << 3) +#define AARCH64_HWCAP2_SVEBITPERM (1UL << 4) +#define AARCH64_HWCAP2_SVESHA3 (1UL << 5) +#define AARCH64_HWCAP2_SVESM4 (1UL << 6) +#define AARCH64_HWCAP2_FLAGM2 (1UL << 7) +#define AARCH64_HWCAP2_FRINT (1UL << 8) +#define AARCH64_HWCAP2_SVEI8MM (1UL << 9) +#define AARCH64_HWCAP2_SVEF32MM (1UL << 10) +#define AARCH64_HWCAP2_SVEF64MM (1UL << 11) +#define AARCH64_HWCAP2_SVEBF16 (1UL << 12) +#define AARCH64_HWCAP2_I8MM (1UL << 13) +#define AARCH64_HWCAP2_BF16 (1UL << 14) +#define AARCH64_HWCAP2_DGH (1UL << 15) +#define AARCH64_HWCAP2_RNG (1UL << 16) +#define AARCH64_HWCAP2_BTI (1UL << 17) +#define AARCH64_HWCAP2_MTE (1UL << 18) + +// http://elixir.free-electrons.com/linux/latest/source/arch/arm/include/uapi/asm/hwcap.h +#define ARM_HWCAP_SWP (1UL << 0) +#define ARM_HWCAP_HALF (1UL << 1) +#define ARM_HWCAP_THUMB (1UL << 2) +#define ARM_HWCAP_26BIT (1UL << 3) +#define ARM_HWCAP_FAST_MULT (1UL << 4) +#define ARM_HWCAP_FPA (1UL << 5) +#define ARM_HWCAP_VFP (1UL << 6) +#define ARM_HWCAP_EDSP (1UL << 7) +#define ARM_HWCAP_JAVA (1UL << 8) +#define ARM_HWCAP_IWMMXT (1UL << 9) +#define ARM_HWCAP_CRUNCH (1UL << 10) +#define ARM_HWCAP_THUMBEE (1UL << 11) +#define ARM_HWCAP_NEON (1UL << 12) +#define ARM_HWCAP_VFPV3 (1UL << 13) +#define ARM_HWCAP_VFPV3D16 (1UL << 14) +#define ARM_HWCAP_TLS (1UL << 15) +#define ARM_HWCAP_VFPV4 (1UL << 16) +#define ARM_HWCAP_IDIVA (1UL << 17) +#define ARM_HWCAP_IDIVT (1UL << 18) +#define ARM_HWCAP_VFPD32 (1UL << 19) +#define ARM_HWCAP_LPAE (1UL << 20) +#define ARM_HWCAP_EVTSTRM (1UL << 21) +#define ARM_HWCAP2_AES (1UL << 0) +#define ARM_HWCAP2_PMULL (1UL << 1) +#define ARM_HWCAP2_SHA1 (1UL << 2) +#define ARM_HWCAP2_SHA2 (1UL << 3) +#define ARM_HWCAP2_CRC32 (1UL << 4) + +// http://elixir.free-electrons.com/linux/latest/source/arch/mips/include/uapi/asm/hwcap.h +#define MIPS_HWCAP_R6 (1UL << 0) +#define MIPS_HWCAP_MSA (1UL << 1) +#define MIPS_HWCAP_CRC32 (1UL << 2) + +// http://elixir.free-electrons.com/linux/latest/source/arch/powerpc/include/uapi/asm/cputable.h +#ifndef _UAPI__ASM_POWERPC_CPUTABLE_H +/* in AT_HWCAP */ +#define PPC_FEATURE_32 0x80000000 +#define PPC_FEATURE_64 0x40000000 +#define PPC_FEATURE_601_INSTR 0x20000000 +#define PPC_FEATURE_HAS_ALTIVEC 0x10000000 +#define PPC_FEATURE_HAS_FPU 0x08000000 +#define PPC_FEATURE_HAS_MMU 0x04000000 +#define PPC_FEATURE_HAS_4xxMAC 0x02000000 +#define PPC_FEATURE_UNIFIED_CACHE 0x01000000 +#define PPC_FEATURE_HAS_SPE 0x00800000 +#define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000 +#define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000 +#define PPC_FEATURE_NO_TB 0x00100000 +#define PPC_FEATURE_POWER4 0x00080000 +#define PPC_FEATURE_POWER5 0x00040000 +#define PPC_FEATURE_POWER5_PLUS 0x00020000 +#define PPC_FEATURE_CELL 0x00010000 +#define PPC_FEATURE_BOOKE 0x00008000 +#define PPC_FEATURE_SMT 0x00004000 +#define PPC_FEATURE_ICACHE_SNOOP 0x00002000 +#define PPC_FEATURE_ARCH_2_05 0x00001000 +#define PPC_FEATURE_PA6T 0x00000800 +#define PPC_FEATURE_HAS_DFP 0x00000400 +#define PPC_FEATURE_POWER6_EXT 0x00000200 +#define PPC_FEATURE_ARCH_2_06 0x00000100 +#define PPC_FEATURE_HAS_VSX 0x00000080 + +#define PPC_FEATURE_PSERIES_PERFMON_COMPAT 0x00000040 + +/* Reserved - do not use 0x00000004 */ +#define PPC_FEATURE_TRUE_LE 0x00000002 +#define PPC_FEATURE_PPC_LE 0x00000001 + +/* in AT_HWCAP2 */ +#define PPC_FEATURE2_ARCH_2_07 0x80000000 +#define PPC_FEATURE2_HTM 0x40000000 +#define PPC_FEATURE2_DSCR 0x20000000 +#define PPC_FEATURE2_EBB 0x10000000 +#define PPC_FEATURE2_ISEL 0x08000000 +#define PPC_FEATURE2_TAR 0x04000000 +#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 +#define PPC_FEATURE2_HTM_NOSC 0x01000000 +#define PPC_FEATURE2_ARCH_3_00 0x00800000 +#define PPC_FEATURE2_HAS_IEEE128 0x00400000 +#define PPC_FEATURE2_DARN 0x00200000 +#define PPC_FEATURE2_SCV 0x00100000 +#define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000 +#endif + +typedef struct { + unsigned long hwcaps; + unsigned long hwcaps2; +} HardwareCapabilities; + +// Retrieves values from auxiliary vector for types AT_HWCAP and AT_HWCAP2. +// First tries to call getauxval(), if not available falls back to reading +// "/proc/self/auxv". +HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void); + +// Checks whether value for AT_HWCAP (or AT_HWCAP2) match hwcaps_mask. +bool CpuFeatures_IsHwCapsSet(const HardwareCapabilities hwcaps_mask, + const HardwareCapabilities hwcaps); + +// Get pointer for the AT_PLATFORM type. +const char* CpuFeatures_GetPlatformPointer(void); +// Get pointer for the AT_BASE_PLATFORM type. +const char* CpuFeatures_GetBasePlatformPointer(void); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_ diff --git a/cpu_features/include/internal/stack_line_reader.h b/cpu_features/include/internal/stack_line_reader.h new file mode 100644 index 0000000..39c1b8b --- /dev/null +++ b/cpu_features/include/internal/stack_line_reader.h @@ -0,0 +1,49 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Reads a file line by line and stores the data on the stack. This allows +// parsing files in one go without allocating. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_ + +#include + +#include "cpu_features_macros.h" +#include "internal/string_view.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + char buffer[STACK_LINE_READER_BUFFER_SIZE]; + StringView view; + int fd; + bool skip_mode; +} StackLineReader; + +// Initializes a StackLineReader. +void StackLineReader_Initialize(StackLineReader* reader, int fd); + +typedef struct { + StringView line; // A view of the line. + bool eof; // Nothing more to read, we reached EOF. + bool full_line; // If false the line was truncated to + // STACK_LINE_READER_BUFFER_SIZE. +} LineResult; + +// Reads the file pointed to by fd and tries to read a full line. +LineResult StackLineReader_NextLine(StackLineReader* reader); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_ diff --git a/cpu_features/include/internal/string_view.h b/cpu_features/include/internal/string_view.h new file mode 100644 index 0000000..a109d45 --- /dev/null +++ b/cpu_features/include/internal/string_view.h @@ -0,0 +1,110 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A view over a piece of string. The view is not 0 terminated. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_ + +#include +#include +#include + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + const char* ptr; + size_t size; +} StringView; + +#ifdef __cplusplus +static const StringView kEmptyStringView = {NULL, 0}; +#else +static const StringView kEmptyStringView; +#endif + +// Returns a StringView from the provided string. +// Passing NULL is valid only if size is 0. +static inline StringView view(const char* str, const size_t size) { + StringView view; + view.ptr = str; + view.size = size; + return view; +} + +static inline StringView str(const char* str) { return view(str, strlen(str)); } + +// Returns the index of the first occurrence of c in view or -1 if not found. +int CpuFeatures_StringView_IndexOfChar(const StringView view, char c); + +// Returns the index of the first occurrence of sub_view in view or -1 if not +// found. +int CpuFeatures_StringView_IndexOf(const StringView view, + const StringView sub_view); + +// Returns whether a is equal to b (same content). +bool CpuFeatures_StringView_IsEquals(const StringView a, const StringView b); + +// Returns whether a starts with b. +bool CpuFeatures_StringView_StartsWith(const StringView a, const StringView b); + +// Removes count characters from the beginning of view or kEmptyStringView if +// count if greater than view.size. +StringView CpuFeatures_StringView_PopFront(const StringView str_view, + size_t count); + +// Removes count characters from the end of view or kEmptyStringView if count if +// greater than view.size. +StringView CpuFeatures_StringView_PopBack(const StringView str_view, + size_t count); + +// Keeps the count first characters of view or view if count if greater than +// view.size. +StringView CpuFeatures_StringView_KeepFront(const StringView str_view, + size_t count); + +// Retrieves the first character of view. If view is empty the behavior is +// undefined. +char CpuFeatures_StringView_Front(const StringView view); + +// Retrieves the last character of view. If view is empty the behavior is +// undefined. +char CpuFeatures_StringView_Back(const StringView view); + +// Removes leading and tailing space characters. +StringView CpuFeatures_StringView_TrimWhitespace(StringView view); + +// Convert StringView to positive integer. e.g. "42", "0x2a". +// Returns -1 on error. +int CpuFeatures_StringView_ParsePositiveNumber(const StringView view); + +// Copies src StringView to dst buffer. +void CpuFeatures_StringView_CopyString(const StringView src, char* dst, + size_t dst_size); + +// Checks if line contains the specified whitespace separated word. +bool CpuFeatures_StringView_HasWord(const StringView line, + const char* const word, + const char separator); + +// Get key/value from line. key and value are separated by ": ". +// key and value are cleaned up from leading and trailing whitespaces. +bool CpuFeatures_StringView_GetAttributeKeyValue(const StringView line, + StringView* key, + StringView* value); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_ diff --git a/cpu_features/ndk_compat/CMakeLists.txt b/cpu_features/ndk_compat/CMakeLists.txt new file mode 100644 index 0000000..37b3866 --- /dev/null +++ b/cpu_features/ndk_compat/CMakeLists.txt @@ -0,0 +1,60 @@ + +# +# library : NDK compat +# +find_package(Threads REQUIRED) +set (NDK_COMPAT_HDRS cpu-features.h) +set (NDK_COMPAT_SRCS + cpu-features.c + $ + $ +) +# Note that following `add_cpu_features_headers_and_sources` will use +# NDK_COMPAT_SRCS in lieu of NDK_COMPAT_HDRS because we don't want cpu_features +# headers to be installed alongside ndk_compat. +add_cpu_features_headers_and_sources(NDK_COMPAT_SRCS NDK_COMPAT_SRCS) +add_library(ndk_compat ${NDK_COMPAT_HDRS} ${NDK_COMPAT_SRCS}) +setup_include_and_definitions(ndk_compat) +target_include_directories(ndk_compat PUBLIC $) +target_link_libraries(ndk_compat PUBLIC ${CMAKE_DL_LIBS} ${CMAKE_THREAD_LIBS_INIT}) +set_target_properties(ndk_compat PROPERTIES PUBLIC_HEADER "${NDK_COMPAT_HDRS}") + +include(GNUInstallDirs) +install(TARGETS ndk_compat + EXPORT CpuFeaturesNdkCompatTargets + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ndk_compat + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) +install(EXPORT CpuFeaturesNdkCompatTargets + NAMESPACE CpuFeatures:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeaturesNdkCompat + COMPONENT Devel +) +include(CMakePackageConfigHelpers) +configure_package_config_file(${PROJECT_SOURCE_DIR}/cmake/CpuFeaturesNdkCompatConfig.cmake.in + "${PROJECT_BINARY_DIR}/CpuFeaturesNdkCompatConfig.cmake" + INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeaturesNdkCompat" + NO_SET_AND_CHECK_MACRO + NO_CHECK_REQUIRED_COMPONENTS_MACRO +) +write_basic_package_version_file( + "${PROJECT_BINARY_DIR}/CpuFeaturesNdkCompatConfigVersion.cmake" + COMPATIBILITY SameMajorVersion +) +install( + FILES + "${PROJECT_BINARY_DIR}/CpuFeaturesNdkCompatConfig.cmake" + "${PROJECT_BINARY_DIR}/CpuFeaturesNdkCompatConfigVersion.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeaturesNdkCompat" + COMPONENT Devel +) + +# +# program : NDK compat test program +# +if(BUILD_TESTING) + add_executable(ndk-compat-test ndk-compat-test.c) + target_link_libraries(ndk-compat-test PRIVATE ndk_compat) +endif() diff --git a/cpu_features/ndk_compat/README.md b/cpu_features/ndk_compat/README.md new file mode 100644 index 0000000..38c8393 --- /dev/null +++ b/cpu_features/ndk_compat/README.md @@ -0,0 +1,4 @@ +Provides a header compatible with [android's NDK cpu-features.h](https://android.googlesource.com/platform/ndk/+/master/sources/android/cpufeatures/cpu-features.h). + +It is intended to be a drop in replacement for this header and help users +transition from the NDK to [Google's cpu_features library](https://github.com/google/cpu_features). diff --git a/cpu_features/ndk_compat/cpu-features.c b/cpu_features/ndk_compat/cpu-features.c new file mode 100644 index 0000000..27ff7bb --- /dev/null +++ b/cpu_features/ndk_compat/cpu-features.c @@ -0,0 +1,205 @@ +#include "cpu-features.h" + +#include + +#include "cpu_features_macros.h" +#include "internal/filesystem.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +#if defined(CPU_FEATURES_ARCH_ARM) +#include "cpuinfo_arm.h" +#elif defined(CPU_FEATURES_ARCH_X86) +#include "cpuinfo_x86.h" +#elif defined(CPU_FEATURES_ARCH_MIPS) +#include "cpuinfo_mips.h" +#elif defined(CPU_FEATURES_ARCH_AARCH64) +#include "cpuinfo_aarch64.h" +#endif + +static pthread_once_t g_once; +static int g_inited; +static uint64_t g_cpuFeatures; +static int g_cpuCount; + +#ifdef CPU_FEATURES_ARCH_ARM +static uint32_t g_cpuIdArm; +#endif + +static void set_cpu_mask_bit(uint32_t index, uint32_t* cpu_mask) { + *cpu_mask |= 1UL << index; +} + +// Examples of valid inputs: "31", "4-31" +static void parse_cpu_mask(const StringView text, uint32_t* cpu_mask) { + int separator_index = CpuFeatures_StringView_IndexOfChar(text, '-'); + if (separator_index < 0) { // A single cpu index + int cpu_index = CpuFeatures_StringView_ParsePositiveNumber(text); + if (cpu_index < 0) return; + set_cpu_mask_bit(cpu_index, cpu_mask); + } else { + int cpu_index_a = CpuFeatures_StringView_ParsePositiveNumber( + CpuFeatures_StringView_KeepFront(text, separator_index)); + int cpu_index_b = CpuFeatures_StringView_ParsePositiveNumber( + CpuFeatures_StringView_PopFront(text, separator_index + 1)); + int i; + if (cpu_index_a < 0 || cpu_index_b < 0) return; + for (i = cpu_index_a; i <= cpu_index_b; ++i) { + if (i < 32) { + set_cpu_mask_bit(i, cpu_mask); + } + } + } +} + +// Format specification from +// https://www.kernel.org/doc/Documentation/cputopology.txt +// Examples of valid inputs: "31", "2,4-31,32-63", "0-1,3" +static void parse_cpu_mask_line(const LineResult result, uint32_t* cpu_mask) { + if (!result.full_line || result.eof) return; + StringView line = result.line; + for (; line.size > 0;) { + int next_entry_index = CpuFeatures_StringView_IndexOfChar(line, ','); + if (next_entry_index < 0) { + parse_cpu_mask(line, cpu_mask); + break; + } + StringView entry = CpuFeatures_StringView_KeepFront(line, next_entry_index); + parse_cpu_mask(entry, cpu_mask); + line = CpuFeatures_StringView_PopFront(line, next_entry_index + 1); + } +} + +static void update_cpu_mask_from_file(const char* filename, + uint32_t* cpu_mask) { + const int fd = CpuFeatures_OpenFile(filename); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + parse_cpu_mask_line(StackLineReader_NextLine(&reader), cpu_mask); + CpuFeatures_CloseFile(fd); + } +} + +static int get_cpu_count(void) { + uint32_t cpu_mask = 0; + update_cpu_mask_from_file("/sys/devices/system/cpu/present", &cpu_mask); + update_cpu_mask_from_file("/sys/devices/system/cpu/possible", &cpu_mask); + return __builtin_popcount(cpu_mask); +} + +static void android_cpuInit(void) { + g_cpuFeatures = 0; + g_cpuCount = 1; + g_inited = 1; + + g_cpuCount = get_cpu_count(); + if (g_cpuCount == 0) { + g_cpuCount = 1; + } +#if defined(CPU_FEATURES_ARCH_ARM) + ArmInfo info = GetArmInfo(); + if (info.architecture == 7) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7; + if (info.features.vfpv3) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; + if (info.features.neon) { + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON; + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFP_D32; + } + if (info.features.vfpv3d16) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFP_FP16; + if (info.features.idiva) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM; + if (info.features.idivt) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2; + if (info.features.iwmmxt) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt; + if (info.features.aes) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_AES; + if (info.features.pmull) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_PMULL; + if (info.features.sha1) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA1; + if (info.features.sha2) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA2; + if (info.features.crc32) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_CRC32; + if (info.architecture >= 6) + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX; + if (info.features.vfp) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2; + if (info.features.vfpv4) { + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFP_FMA; + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA; + } + g_cpuIdArm = GetArmCpuId(&info); +#elif defined(CPU_FEATURES_ARCH_X86) + X86Info info = GetX86Info(); + if (info.features.ssse3) g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3; + if (info.features.popcnt) g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT; + if (info.features.movbe) g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE; + if (info.features.sse4_1) g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_1; + if (info.features.sse4_2) g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_2; + if (info.features.aes) g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AES_NI; + if (info.features.avx) g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX; + if (info.features.rdrnd) g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_RDRAND; + if (info.features.avx2) g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX2; + if (info.features.sha) g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SHA_NI; +#elif defined(CPU_FEATURES_ARCH_MIPS) + MipsInfo info = GetMipsInfo(); + if (info.features.r6) g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_R6; + if (info.features.msa) g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_MSA; +#elif defined(CPU_FEATURES_ARCH_AARCH64) + Aarch64Info info = GetAarch64Info(); + if (info.features.fp) g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_FP; + if (info.features.asimd) g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_ASIMD; + if (info.features.aes) g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_AES; + if (info.features.pmull) g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_PMULL; + if (info.features.sha1) g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA1; + if (info.features.sha2) g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA2; + if (info.features.crc32) g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_CRC32; +#endif +} + +AndroidCpuFamily android_getCpuFamily(void) { +#if defined(CPU_FEATURES_ARCH_ARM) + return ANDROID_CPU_FAMILY_ARM; +#elif defined(CPU_FEATURES_ARCH_X86_32) + return ANDROID_CPU_FAMILY_X86; +#elif defined(CPU_FEATURES_ARCH_MIPS64) + return ANDROID_CPU_FAMILY_MIPS64; +#elif defined(CPU_FEATURES_ARCH_MIPS32) + return ANDROID_CPU_FAMILY_MIPS; +#elif defined(CPU_FEATURES_ARCH_AARCH64) + return ANDROID_CPU_FAMILY_ARM64; +#elif defined(CPU_FEATURES_ARCH_X86_64) + return ANDROID_CPU_FAMILY_X86_64; +#else + return ANDROID_CPU_FAMILY_UNKNOWN; +#endif +} + +uint64_t android_getCpuFeatures(void) { + pthread_once(&g_once, android_cpuInit); + return g_cpuFeatures; +} + +int android_getCpuCount(void) { + pthread_once(&g_once, android_cpuInit); + return g_cpuCount; +} + +static void android_cpuInitDummy(void) { g_inited = 1; } + +int android_setCpu(int cpu_count, uint64_t cpu_features) { + /* Fail if the library was already initialized. */ + if (g_inited) return 0; + g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count); + g_cpuFeatures = cpu_features; + pthread_once(&g_once, android_cpuInitDummy); + return 1; +} + +#ifdef CPU_FEATURES_ARCH_ARM + +uint32_t android_getCpuIdArm(void) { + pthread_once(&g_once, android_cpuInit); + return g_cpuIdArm; +} + +int android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id) { + if (!android_setCpu(cpu_count, cpu_features)) return 0; + g_cpuIdArm = cpu_id; + return 1; +} + +#endif // CPU_FEATURES_ARCH_ARM diff --git a/cpu_features/ndk_compat/cpu-features.h b/cpu_features/ndk_compat/cpu-features.h new file mode 100644 index 0000000..51bea53 --- /dev/null +++ b/cpu_features/ndk_compat/cpu-features.h @@ -0,0 +1,320 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef GOOGLE_CPU_FEATURES_H +#define GOOGLE_CPU_FEATURES_H +#include +#include + +__BEGIN_DECLS + +/* A list of valid values returned by android_getCpuFamily(). + * They describe the CPU Architecture of the current process. + */ +typedef enum { + ANDROID_CPU_FAMILY_UNKNOWN = 0, + ANDROID_CPU_FAMILY_ARM, + ANDROID_CPU_FAMILY_X86, + ANDROID_CPU_FAMILY_MIPS, + ANDROID_CPU_FAMILY_ARM64, + ANDROID_CPU_FAMILY_X86_64, + ANDROID_CPU_FAMILY_MIPS64, + ANDROID_CPU_FAMILY_MAX /* do not remove */ +} AndroidCpuFamily; + +/* Return the CPU family of the current process. + * + * Note that this matches the bitness of the current process. I.e. when + * running a 32-bit binary on a 64-bit capable CPU, this will return the + * 32-bit CPU family value. + */ +extern AndroidCpuFamily android_getCpuFamily(void); + +/* Return a bitmap describing a set of optional CPU features that are + * supported by the current device's CPU. The exact bit-flags returned + * depend on the value returned by android_getCpuFamily(). See the + * documentation for the ANDROID_CPU_*_FEATURE_* flags below for details. + */ +extern uint64_t android_getCpuFeatures(void); + +/* The list of feature flags for ANDROID_CPU_FAMILY_ARM that can be + * recognized by the library (see note below for 64-bit ARM). Value details + * are: + * + * VFPv2: + * CPU supports the VFPv2 instruction set. Many, but not all, ARMv6 CPUs + * support these instructions. VFPv2 is a subset of VFPv3 so this will + * be set whenever VFPv3 is set too. + * + * ARMv7: + * CPU supports the ARMv7-A basic instruction set. + * This feature is mandated by the 'armeabi-v7a' ABI. + * + * VFPv3: + * CPU supports the VFPv3-D16 instruction set, providing hardware FPU + * support for single and double precision floating point registers. + * Note that only 16 FPU registers are available by default, unless + * the D32 bit is set too. This feature is also mandated by the + * 'armeabi-v7a' ABI. + * + * VFP_D32: + * CPU VFP optional extension that provides 32 FPU registers, + * instead of 16. Note that ARM mandates this feature is the 'NEON' + * feature is implemented by the CPU. + * + * NEON: + * CPU FPU supports "ARM Advanced SIMD" instructions, also known as + * NEON. Note that this mandates the VFP_D32 feature as well, per the + * ARM Architecture specification. + * + * VFP_FP16: + * Half-width floating precision VFP extension. If set, the CPU + * supports instructions to perform floating-point operations on + * 16-bit registers. This is part of the VFPv4 specification, but + * not mandated by any Android ABI. + * + * VFP_FMA: + * Fused multiply-accumulate VFP instructions extension. Also part of + * the VFPv4 specification, but not mandated by any Android ABI. + * + * NEON_FMA: + * Fused multiply-accumulate NEON instructions extension. Optional + * extension from the VFPv4 specification, but not mandated by any + * Android ABI. + * + * IDIV_ARM: + * Integer division available in ARM mode. Only available + * on recent CPUs (e.g. Cortex-A15). + * + * IDIV_THUMB2: + * Integer division available in Thumb-2 mode. Only available + * on recent CPUs (e.g. Cortex-A15). + * + * iWMMXt: + * Optional extension that adds MMX registers and operations to an + * ARM CPU. This is only available on a few XScale-based CPU designs + * sold by Marvell. Pretty rare in practice. + * + * AES: + * CPU supports AES instructions. These instructions are only + * available for 32-bit applications running on ARMv8 CPU. + * + * CRC32: + * CPU supports CRC32 instructions. These instructions are only + * available for 32-bit applications running on ARMv8 CPU. + * + * SHA2: + * CPU supports SHA2 instructions. These instructions are only + * available for 32-bit applications running on ARMv8 CPU. + * + * SHA1: + * CPU supports SHA1 instructions. These instructions are only + * available for 32-bit applications running on ARMv8 CPU. + * + * PMULL: + * CPU supports 64-bit PMULL and PMULL2 instructions. These + * instructions are only available for 32-bit applications + * running on ARMv8 CPU. + * + * If you want to tell the compiler to generate code that targets one of + * the feature set above, you should probably use one of the following + * flags (for more details, see technical note at the end of this file): + * + * -mfpu=vfp + * -mfpu=vfpv2 + * These are equivalent and tell GCC to use VFPv2 instructions for + * floating-point operations. Use this if you want your code to + * run on *some* ARMv6 devices, and any ARMv7-A device supported + * by Android. + * + * Generated code requires VFPv2 feature. + * + * -mfpu=vfpv3-d16 + * Tell GCC to use VFPv3 instructions (using only 16 FPU registers). + * This should be generic code that runs on any CPU that supports the + * 'armeabi-v7a' Android ABI. Note that no ARMv6 CPU supports this. + * + * Generated code requires VFPv3 feature. + * + * -mfpu=vfpv3 + * Tell GCC to use VFPv3 instructions with 32 FPU registers. + * Generated code requires VFPv3|VFP_D32 features. + * + * -mfpu=neon + * Tell GCC to use VFPv3 instructions with 32 FPU registers, and + * also support NEON intrinsics (see ). + * Generated code requires VFPv3|VFP_D32|NEON features. + * + * -mfpu=vfpv4-d16 + * Generated code requires VFPv3|VFP_FP16|VFP_FMA features. + * + * -mfpu=vfpv4 + * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32 features. + * + * -mfpu=neon-vfpv4 + * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|NEON|NEON_FMA + * features. + * + * -mcpu=cortex-a7 + * -mcpu=cortex-a15 + * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32| + * NEON|NEON_FMA|IDIV_ARM|IDIV_THUMB2 + * This flag implies -mfpu=neon-vfpv4. + * + * -mcpu=iwmmxt + * Allows the use of iWMMXt instrinsics with GCC. + * + * IMPORTANT NOTE: These flags should only be tested when + * android_getCpuFamily() returns ANDROID_CPU_FAMILY_ARM, i.e. this is a + * 32-bit process. + * + * When running a 64-bit ARM process on an ARMv8 CPU, + * android_getCpuFeatures() will return a different set of bitflags + */ +enum { + ANDROID_CPU_ARM_FEATURE_ARMv7 = (1 << 0), + ANDROID_CPU_ARM_FEATURE_VFPv3 = (1 << 1), + ANDROID_CPU_ARM_FEATURE_NEON = (1 << 2), + ANDROID_CPU_ARM_FEATURE_LDREX_STREX = (1 << 3), + ANDROID_CPU_ARM_FEATURE_VFPv2 = (1 << 4), + ANDROID_CPU_ARM_FEATURE_VFP_D32 = (1 << 5), + ANDROID_CPU_ARM_FEATURE_VFP_FP16 = (1 << 6), + ANDROID_CPU_ARM_FEATURE_VFP_FMA = (1 << 7), + ANDROID_CPU_ARM_FEATURE_NEON_FMA = (1 << 8), + ANDROID_CPU_ARM_FEATURE_IDIV_ARM = (1 << 9), + ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 = (1 << 10), + ANDROID_CPU_ARM_FEATURE_iWMMXt = (1 << 11), + ANDROID_CPU_ARM_FEATURE_AES = (1 << 12), + ANDROID_CPU_ARM_FEATURE_PMULL = (1 << 13), + ANDROID_CPU_ARM_FEATURE_SHA1 = (1 << 14), + ANDROID_CPU_ARM_FEATURE_SHA2 = (1 << 15), + ANDROID_CPU_ARM_FEATURE_CRC32 = (1 << 16), +}; + +/* The bit flags corresponding to the output of android_getCpuFeatures() + * when android_getCpuFamily() returns ANDROID_CPU_FAMILY_ARM64. Value details + * are: + * + * FP: + * CPU has Floating-point unit. + * + * ASIMD: + * CPU has Advanced SIMD unit. + * + * AES: + * CPU supports AES instructions. + * + * CRC32: + * CPU supports CRC32 instructions. + * + * SHA2: + * CPU supports SHA2 instructions. + * + * SHA1: + * CPU supports SHA1 instructions. + * + * PMULL: + * CPU supports 64-bit PMULL and PMULL2 instructions. + */ +enum { + ANDROID_CPU_ARM64_FEATURE_FP = (1 << 0), + ANDROID_CPU_ARM64_FEATURE_ASIMD = (1 << 1), + ANDROID_CPU_ARM64_FEATURE_AES = (1 << 2), + ANDROID_CPU_ARM64_FEATURE_PMULL = (1 << 3), + ANDROID_CPU_ARM64_FEATURE_SHA1 = (1 << 4), + ANDROID_CPU_ARM64_FEATURE_SHA2 = (1 << 5), + ANDROID_CPU_ARM64_FEATURE_CRC32 = (1 << 6), +}; + +/* The bit flags corresponding to the output of android_getCpuFeatures() + * when android_getCpuFamily() returns ANDROID_CPU_FAMILY_X86 or + * ANDROID_CPU_FAMILY_X86_64. + */ +enum { + ANDROID_CPU_X86_FEATURE_SSSE3 = (1 << 0), + ANDROID_CPU_X86_FEATURE_POPCNT = (1 << 1), + ANDROID_CPU_X86_FEATURE_MOVBE = (1 << 2), + ANDROID_CPU_X86_FEATURE_SSE4_1 = (1 << 3), + ANDROID_CPU_X86_FEATURE_SSE4_2 = (1 << 4), + ANDROID_CPU_X86_FEATURE_AES_NI = (1 << 5), + ANDROID_CPU_X86_FEATURE_AVX = (1 << 6), + ANDROID_CPU_X86_FEATURE_RDRAND = (1 << 7), + ANDROID_CPU_X86_FEATURE_AVX2 = (1 << 8), + ANDROID_CPU_X86_FEATURE_SHA_NI = (1 << 9), +}; + +/* The bit flags corresponding to the output of android_getCpuFeatures() + * when android_getCpuFamily() returns ANDROID_CPU_FAMILY_MIPS + * or ANDROID_CPU_FAMILY_MIPS64. Values are: + * + * R6: + * CPU executes MIPS Release 6 instructions natively, and + * supports obsoleted R1..R5 instructions only via kernel traps. + * + * MSA: + * CPU supports Mips SIMD Architecture instructions. + */ +enum { + ANDROID_CPU_MIPS_FEATURE_R6 = (1 << 0), + ANDROID_CPU_MIPS_FEATURE_MSA = (1 << 1), +}; + +/* Return the number of CPU cores detected on this device. + * Please note the current implementation supports up to 32 cpus. + */ +extern int android_getCpuCount(void); + +/* The following is used to force the CPU count and features + * mask in sandboxed processes. Under 4.1 and higher, these processes + * cannot access /proc, which is the only way to get information from + * the kernel about the current hardware (at least on ARM). + * + * It _must_ be called only once, and before any android_getCpuXXX + * function, any other case will fail. + * + * This function return 1 on success, and 0 on failure. + */ +extern int android_setCpu(int cpu_count, uint64_t cpu_features); + +#ifdef __arm__ + +/* Retrieve the ARM 32-bit CPUID value from the kernel. + * Note that this cannot work on sandboxed processes under 4.1 and + * higher, unless you called android_setCpuArm() before. + */ +extern uint32_t android_getCpuIdArm(void); + +/* An ARM-specific variant of android_setCpu() that also allows you + * to set the ARM CPUID field. + */ +extern int android_setCpuArm(int cpu_count, uint64_t cpu_features, + uint32_t cpu_id); + +#endif + +__END_DECLS +#endif /* GOOGLE_CPU_FEATURES_H */ diff --git a/cpu_features/ndk_compat/ndk-compat-test.c b/cpu_features/ndk_compat/ndk-compat-test.c new file mode 100644 index 0000000..e4005d4 --- /dev/null +++ b/cpu_features/ndk_compat/ndk-compat-test.c @@ -0,0 +1,12 @@ +#include + +#include "cpu-features.h" + +int main() { + printf("android_getCpuFamily()=%d\n", android_getCpuFamily()); + printf("android_getCpuFeatures()=0x%08llx\n", android_getCpuFeatures()); + printf("android_getCpuCount()=%d\n", android_getCpuCount()); +#ifdef __arm__ + printf("android_getCpuIdArm()=0x%04x\n", android_getCpuIdArm()); +#endif //__arm__ +} diff --git a/cpu_features/scripts/make_release.sh b/cpu_features/scripts/make_release.sh new file mode 100755 index 0000000..72a2a27 --- /dev/null +++ b/cpu_features/scripts/make_release.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash + +set -e # Fail on error +set -u # Treat unset variables as an error and exit immediately + +ACTION='\033[1;90m' +FINISHED='\033[1;96m' +NOCOLOR='\033[0m' +ERROR='\033[0;31m' + +echo -e "${ACTION}Checking environnement${NOCOLOR}" +if [[ ! $1 =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] +then + echo -e "${ERROR}Invalid version number. Aborting. ${NOCOLOR}" + exit 1 +fi + +declare -r VERSION=$1 +declare -r GIT_TAG="v$1" + +BRANCH=$(git rev-parse --abbrev-ref HEAD) +if [[ "${BRANCH}" != "master" ]] +then + echo -e "${ERROR}Not on master. Aborting. ${NOCOLOR}" + echo + exit 1 +fi + +git fetch +HEADHASH=$(git rev-parse HEAD) +UPSTREAMHASH=$(git rev-parse master@{upstream}) + +if [[ "${HEADHASH}" != "${UPSTREAMHASH}" ]] +then + echo -e "${ERROR}Not up to date with origin. Aborting.${NOCOLOR}" + echo + exit 1 +fi + +git update-index -q --refresh +if ! git diff-index --quiet HEAD -- +then + echo -e "${ERROR}Branch has uncommited changes. Aborting.${NOCOLOR}" + exit 1 +fi + +if [ ! -z "$(git ls-files --exclude-standard --others)" ] +then + echo -e "${ERROR}Branch has untracked files. Aborting.${NOCOLOR}" + exit 1 +fi + +declare -r LATEST_GIT_TAG=$(git describe --tags) +declare -r LATEST_VERSION=${LATEST_GIT_TAG#"v"} + +if ! dpkg --compare-versions "${VERSION}" "gt" "${LATEST_VERSION}" +then + echo -e "${ERROR}Invalid version ${VERSION} <= ${LATEST_VERSION} (latest). Aborting.${NOCOLOR}" + exit 1 +fi + +echo -e "${ACTION}Modifying CMakeLists.txt${NOCOLOR}" +sed -i "s/CpuFeatures VERSION ${LATEST_VERSION}/CpuFeatures VERSION ${VERSION}/g" CMakeLists.txt + +echo -e "${ACTION}Commit new revision${NOCOLOR}" +git add CMakeLists.txt +git commit -m"Release ${GIT_TAG}" + +echo -e "${ACTION}Create new tag${NOCOLOR}" +git tag ${GIT_TAG} + +echo -e "${FINISHED}Local release is ready. Run `git push origin --tags`${NOCOLOR}" diff --git a/cpu_features/scripts/run_integration.sh b/cpu_features/scripts/run_integration.sh new file mode 100755 index 0000000..645cb6a --- /dev/null +++ b/cpu_features/scripts/run_integration.sh @@ -0,0 +1,363 @@ +#!/usr/bin/env bash +set -eo pipefail + +function extract() { + echo "Extracting ${1}..." + case $1 in + *.tar.bz2) tar xjf "$1" ;; + *.tar.xz) tar xJf "$1" ;; + *.tar.gz) tar xzf "$1" ;; + *) + >&2 echo "don't know how to extract '$1'..." + exit 1 + esac +} + +function unpack() { + mkdir -p "${ARCHIVE_DIR}" + cd "${ARCHIVE_DIR}" || exit 2 + local -r URL=$1 + local -r RELATIVE_DIR=$2 + local -r DESTINATION="${ARCHIVE_DIR}/${RELATIVE_DIR}" + if [[ ! -d "${DESTINATION}" ]] ; then + echo "Downloading ${URL}..." + local -r ARCHIVE_NAME=$(basename "${URL}") + test -f "${ARCHIVE_NAME}" || wget --no-verbose "${URL}" + extract "${ARCHIVE_NAME}" + rm -f "${ARCHIVE_NAME}" + fi +} + +function install_qemu() { + if [[ "${QEMU_ARCH}" == "DISABLED" ]]; then + >&2 echo 'QEMU is disabled !' + return 0 + fi + local -r QEMU_VERSION=${QEMU_VERSION:=5.2.0} + local -r QEMU_TARGET=${QEMU_ARCH}-linux-user + + if echo "${QEMU_VERSION} ${QEMU_TARGET}" | cmp --silent "${QEMU_INSTALL}/.build" -; then + echo "qemu ${QEMU_VERSION} up to date!" + return 0 + fi + + echo "QEMU_VERSION: ${QEMU_VERSION}" + echo "QEMU_TARGET: ${QEMU_TARGET}" + + rm -rf "${QEMU_INSTALL}" + + # Checking for a tarball before downloading makes testing easier :-) + local -r QEMU_URL="http://wiki.qemu-project.org/download/qemu-${QEMU_VERSION}.tar.xz" + local -r QEMU_DIR="qemu-${QEMU_VERSION}" + unpack ${QEMU_URL} ${QEMU_DIR} + cd ${QEMU_DIR} || exit 2 + + # Qemu (meson based build) depends on: pkgconf, libglib2.0, python3, ninja + ./configure \ + --prefix="${QEMU_INSTALL}" \ + --target-list="${QEMU_TARGET}" \ + --audio-drv-list= \ + --disable-brlapi \ + --disable-curl \ + --disable-curses \ + --disable-docs \ + --disable-gcrypt \ + --disable-gnutls \ + --disable-gtk \ + --disable-libnfs \ + --disable-libssh \ + --disable-nettle \ + --disable-opengl \ + --disable-sdl \ + --disable-virglrenderer \ + --disable-vte \ + --enable-modules + + # --static Not supported on Archlinux + # so we use --enable-modules + + # wrapper on ninja + make -j8 + make install + + echo "$QEMU_VERSION $QEMU_TARGET" > "${QEMU_INSTALL}/.build" +} + +function assert_defined(){ + if [[ -z "${!1}" ]]; then + >&2 echo "Variable '${1}' must be defined" + exit 1 + fi +} + +function clean_build() { + # Cleanup previous build + rm -rf "${BUILD_DIR}" + mkdir -p "${BUILD_DIR}" +} + +function expand_linaro_config() { + #ref: https://releases.linaro.org/components/toolchain/binaries/ + local -r LINARO_VERSION=7.5-2019.12 + local -r LINARO_ROOT_URL=https://releases.linaro.org/components/toolchain/binaries/${LINARO_VERSION} + + local -r GCC_VERSION=7.5.0-2019.12 + local -r GCC_URL=${LINARO_ROOT_URL}/${TARGET}/gcc-linaro-${GCC_VERSION}-x86_64_${TARGET}.tar.xz + local -r GCC_RELATIVE_DIR="gcc-linaro-${GCC_VERSION}-x86_64_${TARGET}" + unpack "${GCC_URL}" "${GCC_RELATIVE_DIR}" + + local -r SYSROOT_VERSION=2.25-2019.12 + local -r SYSROOT_URL=${LINARO_ROOT_URL}/${TARGET}/sysroot-glibc-linaro-${SYSROOT_VERSION}-${TARGET}.tar.xz + local -r SYSROOT_RELATIVE_DIR=sysroot-glibc-linaro-${SYSROOT_VERSION}-${TARGET} + unpack "${SYSROOT_URL}" "${SYSROOT_RELATIVE_DIR}" + + local -r SYSROOT_DIR=${ARCHIVE_DIR}/${SYSROOT_RELATIVE_DIR} + local -r STAGING_DIR=${ARCHIVE_DIR}/${SYSROOT_RELATIVE_DIR}-stage + local -r GCC_DIR=${ARCHIVE_DIR}/${GCC_RELATIVE_DIR} + + # Write a Toolchain file + # note: This is manadatory to use a file in order to have the CMake variable + # 'CMAKE_CROSSCOMPILING' set to TRUE. + # ref: https://cmake.org/cmake/help/latest/manual/cmake-toolchains.7.html#cross-compiling-for-linux + cat >"$TOOLCHAIN_FILE" <&2 echo 'unknown mips platform' + exit 1 ;; + esac + local -r SYSROOT_DIR=${GCC_DIR}/sysroot + local -r STAGING_DIR=${SYSROOT_DIR}-stage + + # Write a Toolchain file + # note: This is manadatory to use a file in order to have the CMake variable + # 'CMAKE_CROSSCOMPILING' set to TRUE. + # ref: https://cmake.org/cmake/help/latest/manual/cmake-toolchains.7.html#cross-compiling-for-linux + cat >"${TOOLCHAIN_FILE}" <&2 echo "QEMU is disabled for ${TARGET}" + return + fi + install_qemu + RUN_CMD="${QEMU_INSTALL}/bin/qemu-${QEMU_ARCH} ${QEMU_ARGS[*]}" + + cd "${BUILD_DIR}" || exit 2 + set -x + for test_binary in "${BUILD_DIR}"/list_cpu_feature* ; do + ${RUN_CMD} "${test_binary}" + done + set +x +} + +function usage() { + local -r NAME=$(basename "$0") + echo -e "$NAME - Build using a cross toolchain. + +SYNOPSIS +\t$NAME [-h|--help] [toolchain|build|qemu|test|all] + +DESCRIPTION +\tCross compile using a cross toolchain. + +\tYou MUST define the following variables before running this script: +\t* TARGET: +\t\tx86_64 +\t\taarch64-linux-gnu aarch64_be-linux-gnu +\t\tarm-linux-gnueabihf armv8l-linux-gnueabihf arm-linux-gnueabi +\t\tarmeb-linux-gnueabihf armeb-linux-gnueabi +\t\tmips32 mips32el +\t\tmips64 mips64el + +OPTIONS +\t-h --help: show this help text +\ttoolchain: download, unpack toolchain and generate CMake toolchain file +\tbuild: toolchain + build the project using the toolchain file (note: remove previous build dir) +\tqemu: download, unpack and build qemu +\ttest: qemu + run all executable using qemu (note: don't build !) +\tall: build + test (default) + +EXAMPLES +* Using export: +export TARGET=aarch64-linux-gnu +$0 + +* One-liner: +TARGET=aarch64-linux-gnu $0" +} + +# Main +function main() { + case ${1} in + -h | --help) + usage; exit ;; + esac + + assert_defined TARGET + + declare -r PROJECT_DIR="$(cd -P -- "$(dirname -- "$0")/.." && pwd -P)" + declare -r ARCHIVE_DIR="${PROJECT_DIR}/build_cross/archives" + declare -r BUILD_DIR="${PROJECT_DIR}/build_cross/${TARGET}" + declare -r TOOLCHAIN_FILE=${ARCHIVE_DIR}/toolchain_${TARGET}.cmake + + echo "Target: '${TARGET}'" + + echo "Project dir: '${PROJECT_DIR}'" + echo "Archive dir: '${ARCHIVE_DIR}'" + echo "Build dir: '${BUILD_DIR}'" + echo "toolchain file: '${TOOLCHAIN_FILE}'" + + declare -a CMAKE_DEFAULT_ARGS=( -G ${CMAKE_GENERATOR:-"Ninja"} ) + declare -a CMAKE_ADDITIONAL_ARGS=() + + declare -a QEMU_ARGS=() + case ${TARGET} in + x86_64) + declare -r QEMU_ARCH=x86_64 ;; + arm-linux-gnueabihf | armv8l-linux-gnueabihf | arm-linux-gnueabi) + expand_linaro_config + declare -r QEMU_ARCH=arm ;; + armeb-linux-gnueabihf | armeb-linux-gnueabi) + expand_linaro_config + declare -r QEMU_ARCH=DISABLED ;; + aarch64-linux-gnu) + expand_linaro_config + declare -r QEMU_ARCH=aarch64 ;; + aarch64_be-linux-gnu) + expand_linaro_config + declare -r QEMU_ARCH=DISABLED ;; + mips32) + expand_codescape_config + declare -r QEMU_ARCH=mips ;; + mips32el) + expand_codescape_config + declare -r QEMU_ARCH=mipsel ;; + mips64) + expand_codescape_config + declare -r QEMU_ARCH=mips64 ;; + mips64el) + expand_codescape_config + declare -r QEMU_ARCH=mips64el ;; + *) + >&2 echo "Unknown TARGET '${TARGET}'..." + exit 1 ;; + esac + declare -r QEMU_INSTALL=${ARCHIVE_DIR}/qemu-${QEMU_ARCH} + + case ${1} in + toolchain) + exit ;; + build) + build ;; + qemu) + install_qemu ;; + test) + run_test ;; + *) + build + run_test ;; + esac +} + +main "${1:-all}" diff --git a/cpu_features/scripts/test_integration.sh b/cpu_features/scripts/test_integration.sh new file mode 100755 index 0000000..0947e36 --- /dev/null +++ b/cpu_features/scripts/test_integration.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash + +# Toolchains for little-endian, 64-bit ARMv8 for GNU/Linux systems +function set_aarch64-linux-gnu() { + export TARGET=aarch64-linux-gnu +} + +# Toolchains for little-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems +function set_arm-linux-gnueabihf() { + export TARGET=arm-linux-gnueabihf +} + +# Toolchains for little-endian, 32-bit ARMv8 for GNU/Linux systems +function set_armv8l-linux-gnueabihf() { + export TARGET=armv8l-linux-gnueabihf +} + +# Toolchains for little-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems +function set_arm-linux-gnueabi() { + export TARGET=arm-linux-gnueabi +} + +# Toolchains for big-endian, 64-bit ARMv8 for GNU/Linux systems +function set_aarch64_be-linux-gnu() { + export TARGET=aarch64_be-linux-gnu +} + +# Toolchains for big-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems +function set_armeb-linux-gnueabihf() { + export TARGET=armeb-linux-gnueabihf +} + +# Toolchains for big-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems +function set_armeb-linux-gnueabi() { + export TARGET=armeb-linux-gnueabi +} + +function set_mips32() { + export TARGET=mips32 +} + +function set_mips32el() { + export TARGET=mips32el +} + +function set_mips64() { + export TARGET=mips64 +} + +function set_mips64el() { + export TARGET=mips64el +} + +function set_x86_64() { + export TARGET=x86_64 +} + +ENVIRONMENTS=" + set_aarch64-linux-gnu + set_arm-linux-gnueabihf + set_armv8l-linux-gnueabihf + set_arm-linux-gnueabi + set_aarch64_be-linux-gnu + set_armeb-linux-gnueabihf + set_armeb-linux-gnueabi + set_mips32 + set_mips32el + set_mips64 + set_mips64el + set_x86_64 +" + +set -e + +for SET_ENVIRONMENT in ${ENVIRONMENTS}; do + echo "testing ${SET_ENVIRONMENT}" + ${SET_ENVIRONMENT} + ./"$(dirname -- "$0")"/run_integration.sh +done diff --git a/cpu_features/src/copy.inl b/cpu_features/src/copy.inl new file mode 100644 index 0000000..47771d4 --- /dev/null +++ b/cpu_features/src/copy.inl @@ -0,0 +1,19 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +static void copy(char *__restrict dst, const char *src, size_t count) { + for (size_t i = 0; i < count; ++i) dst[i] = src[i]; +} diff --git a/cpu_features/src/define_introspection.inl b/cpu_features/src/define_introspection.inl new file mode 100644 index 0000000..c0eb916 --- /dev/null +++ b/cpu_features/src/define_introspection.inl @@ -0,0 +1,86 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INTROSPECTION_PREFIX +#error "missing INTROSPECTION_PREFIX" +#endif +#ifndef INTROSPECTION_ENUM_PREFIX +#error "missing INTROSPECTION_ENUM_PREFIX" +#endif +#ifndef INTROSPECTION_TABLE +#error "missing INTROSPECTION_TABLE" +#endif + +#include + +#define STRINGIZE_(s) #s +#define STRINGIZE(s) STRINGIZE_(s) + +#define FEAT_TYPE_NAME__(X) X##Features +#define FEAT_TYPE_NAME_(X) FEAT_TYPE_NAME__(X) +#define FEAT_TYPE_NAME FEAT_TYPE_NAME_(INTROSPECTION_PREFIX) + +#define FEAT_ENUM_NAME__(X) X##FeaturesEnum +#define FEAT_ENUM_NAME_(X) FEAT_ENUM_NAME__(X) +#define FEAT_ENUM_NAME FEAT_ENUM_NAME_(INTROSPECTION_PREFIX) + +#define GET_FEAT_ENUM_VALUE__(X) Get##X##FeaturesEnumValue +#define GET_FEAT_ENUM_VALUE_(X) GET_FEAT_ENUM_VALUE__(X) +#define GET_FEAT_ENUM_VALUE GET_FEAT_ENUM_VALUE_(INTROSPECTION_PREFIX) + +#define GET_FEAT_ENUM_NAME__(X) Get##X##FeaturesEnumName +#define GET_FEAT_ENUM_NAME_(X) GET_FEAT_ENUM_NAME__(X) +#define GET_FEAT_ENUM_NAME GET_FEAT_ENUM_NAME_(INTROSPECTION_PREFIX) + +#define FEAT_ENUM_LAST__(X) X##_LAST_ +#define FEAT_ENUM_LAST_(X) FEAT_ENUM_LAST__(X) +#define FEAT_ENUM_LAST FEAT_ENUM_LAST_(INTROSPECTION_ENUM_PREFIX) + +// Generate individual getters and setters. +#define LINE(ENUM, NAME, A, B, C) \ + static void set_##ENUM(FEAT_TYPE_NAME* features, bool value) { \ + features->NAME = value; \ + } \ + static int get_##ENUM(const FEAT_TYPE_NAME* features) { \ + return features->NAME; \ + } +INTROSPECTION_TABLE +#undef LINE + +// Generate getters table +#define LINE(ENUM, NAME, A, B, C) [ENUM] = get_##ENUM, +static int (*const kGetters[])(const FEAT_TYPE_NAME*) = {INTROSPECTION_TABLE}; +#undef LINE + +// Generate setters table +#define LINE(ENUM, NAME, A, B, C) [ENUM] = set_##ENUM, +static void (*const kSetters[])(FEAT_TYPE_NAME*, bool) = {INTROSPECTION_TABLE}; +#undef LINE + +// Implements the `GetXXXFeaturesEnumValue` API. +int GET_FEAT_ENUM_VALUE(const FEAT_TYPE_NAME* features, FEAT_ENUM_NAME value) { + if (value >= FEAT_ENUM_LAST) return false; + return kGetters[value](features); +} + +// Generate feature name table. +#define LINE(ENUM, NAME, A, B, C) [ENUM] = STRINGIZE(NAME), +static const char* kFeatureNames[] = {INTROSPECTION_TABLE}; +#undef LINE + +// Implements the `GetXXXFeaturesEnumName` API. +const char* GET_FEAT_ENUM_NAME(FEAT_ENUM_NAME value) { + if (value >= FEAT_ENUM_LAST) return "unknown_feature"; + return kFeatureNames[value]; +} diff --git a/cpu_features/src/define_introspection_and_hwcaps.inl b/cpu_features/src/define_introspection_and_hwcaps.inl new file mode 100644 index 0000000..c31b60d --- /dev/null +++ b/cpu_features/src/define_introspection_and_hwcaps.inl @@ -0,0 +1,26 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "define_introspection.inl" +#include "internal/hwcaps.h" + +#define LINE(ENUM, NAME, CPUINFO_FLAG, HWCAP, HWCAP2) \ + [ENUM] = (HardwareCapabilities){HWCAP, HWCAP2}, +static const HardwareCapabilities kHardwareCapabilities[] = { + INTROSPECTION_TABLE}; +#undef LINE + +#define LINE(ENUM, NAME, CPUINFO_FLAG, HWCAP, HWCAP2) [ENUM] = CPUINFO_FLAG, +static const char* kCpuInfoFlags[] = {INTROSPECTION_TABLE}; +#undef LINE diff --git a/cpu_features/src/equals.inl b/cpu_features/src/equals.inl new file mode 100644 index 0000000..67a115f --- /dev/null +++ b/cpu_features/src/equals.inl @@ -0,0 +1,22 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +static bool equals(const char *lhs, const char *rhs, size_t count) { + for (size_t i = 0; i < count; ++i) + if (lhs[i] != rhs[i]) return false; + return true; +} diff --git a/cpu_features/src/filesystem.c b/cpu_features/src/filesystem.c new file mode 100644 index 0000000..46c9906 --- /dev/null +++ b/cpu_features/src/filesystem.c @@ -0,0 +1,62 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/filesystem.h" + +#include +#include +#include +#include +#include + +#if defined(CPU_FEATURES_MOCK_FILESYSTEM) +// Implementation will be provided by test/filesystem_for_testing.cc. +#elif defined(_MSC_VER) +#include +int CpuFeatures_OpenFile(const char* filename) { + int fd = -1; + _sopen_s(&fd, filename, _O_RDONLY, _SH_DENYWR, _S_IREAD); + return fd; +} + +void CpuFeatures_CloseFile(int file_descriptor) { _close(file_descriptor); } + +int CpuFeatures_ReadFile(int file_descriptor, void* buffer, + size_t buffer_size) { + return _read(file_descriptor, buffer, (unsigned int)buffer_size); +} + +#else +#include + +int CpuFeatures_OpenFile(const char* filename) { + int result; + do { + result = open(filename, O_RDONLY); + } while (result == -1L && errno == EINTR); + return result; +} + +void CpuFeatures_CloseFile(int file_descriptor) { close(file_descriptor); } + +int CpuFeatures_ReadFile(int file_descriptor, void* buffer, + size_t buffer_size) { + int result; + do { + result = read(file_descriptor, buffer, buffer_size); + } while (result == -1L && errno == EINTR); + return result; +} + +#endif diff --git a/cpu_features/src/hwcaps.c b/cpu_features/src/hwcaps.c new file mode 100644 index 0000000..605f892 --- /dev/null +++ b/cpu_features/src/hwcaps.c @@ -0,0 +1,175 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/hwcaps.h" + +#include +#include + +#include "cpu_features_macros.h" +#include "internal/filesystem.h" +#include "internal/string_view.h" + +static bool IsSet(const uint32_t mask, const uint32_t value) { + if (mask == 0) return false; + return (value & mask) == mask; +} + +bool CpuFeatures_IsHwCapsSet(const HardwareCapabilities hwcaps_mask, + const HardwareCapabilities hwcaps) { + return IsSet(hwcaps_mask.hwcaps, hwcaps.hwcaps) || + IsSet(hwcaps_mask.hwcaps2, hwcaps.hwcaps2); +} + +#ifdef CPU_FEATURES_TEST +// In test mode, hwcaps_for_testing will define the following functions. +HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void); +const char* CpuFeatures_GetPlatformPointer(void); +const char* CpuFeatures_GetBasePlatformPointer(void); +#else + +// Debug facilities +#if defined(NDEBUG) +#define D(...) +#else +#include +#define D(...) \ + do { \ + printf(__VA_ARGS__); \ + fflush(stdout); \ + } while (0) +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Implementation of GetElfHwcapFromGetauxval +//////////////////////////////////////////////////////////////////////////////// + +#define AT_HWCAP 16 +#define AT_HWCAP2 26 +#define AT_PLATFORM 15 +#define AT_BASE_PLATFORM 24 + +#if defined(HAVE_STRONG_GETAUXVAL) +#include +static unsigned long GetElfHwcapFromGetauxval(uint32_t hwcap_type) { + return getauxval(hwcap_type); +} +#elif defined(HAVE_DLFCN_H) +// On Android we probe the system's C library for a 'getauxval' function and +// call it if it exits, or return 0 for failure. This function is available +// since API level 20. +// +// This code does *NOT* check for '__ANDROID_API__ >= 20' to support the edge +// case where some NDK developers use headers for a platform that is newer than +// the one really targetted by their application. This is typically done to use +// newer native APIs only when running on more recent Android versions, and +// requires careful symbol management. +// +// Note that getauxval() can't really be re-implemented here, because its +// implementation does not parse /proc/self/auxv. Instead it depends on values +// that are passed by the kernel at process-init time to the C runtime +// initialization layer. + +#include + +typedef unsigned long getauxval_func_t(unsigned long); + +static uint32_t GetElfHwcapFromGetauxval(uint32_t hwcap_type) { + uint32_t ret = 0; + void *libc_handle = NULL; + getauxval_func_t *func = NULL; + + dlerror(); // Cleaning error state before calling dlopen. + libc_handle = dlopen("libc.so", RTLD_NOW); + if (!libc_handle) { + D("Could not dlopen() C library: %s\n", dlerror()); + return 0; + } + func = (getauxval_func_t *)dlsym(libc_handle, "getauxval"); + if (!func) { + D("Could not find getauxval() in C library\n"); + } else { + // Note: getauxval() returns 0 on failure. Doesn't touch errno. + ret = (uint32_t)(*func)(hwcap_type); + } + dlclose(libc_handle); + return ret; +} +#else +#error "This platform does not provide hardware capabilities." +#endif + +// Implementation of GetHardwareCapabilities for OS that provide +// GetElfHwcapFromGetauxval(). + +// Fallback when getauxval is not available, retrieves hwcaps from +// "/proc/self/auxv". +static uint32_t GetElfHwcapFromProcSelfAuxv(uint32_t hwcap_type) { + struct { + uint32_t tag; + uint32_t value; + } entry; + uint32_t result = 0; + const char filepath[] = "/proc/self/auxv"; + const int fd = CpuFeatures_OpenFile(filepath); + if (fd < 0) { + D("Could not open %s\n", filepath); + return 0; + } + for (;;) { + const int ret = CpuFeatures_ReadFile(fd, (char *)&entry, sizeof entry); + if (ret < 0) { + D("Error while reading %s\n", filepath); + break; + } + // Detect end of list. + if (ret == 0 || (entry.tag == 0 && entry.value == 0)) { + break; + } + if (entry.tag == hwcap_type) { + result = entry.value; + break; + } + } + CpuFeatures_CloseFile(fd); + return result; +} + +// Retrieves hardware capabilities by first trying to call getauxval, if not +// available falls back to reading "/proc/self/auxv". +static unsigned long GetHardwareCapabilitiesFor(uint32_t type) { + unsigned long hwcaps = GetElfHwcapFromGetauxval(type); + if (!hwcaps) { + D("Parsing /proc/self/auxv to extract ELF hwcaps!\n"); + hwcaps = GetElfHwcapFromProcSelfAuxv(type); + } + return hwcaps; +} + +HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) { + HardwareCapabilities capabilities; + capabilities.hwcaps = GetHardwareCapabilitiesFor(AT_HWCAP); + capabilities.hwcaps2 = GetHardwareCapabilitiesFor(AT_HWCAP2); + return capabilities; +} + +const char *CpuFeatures_GetPlatformPointer(void) { + return (const char *)GetHardwareCapabilitiesFor(AT_PLATFORM); +} + +const char *CpuFeatures_GetBasePlatformPointer(void) { + return (const char *)GetHardwareCapabilitiesFor(AT_BASE_PLATFORM); +} + +#endif // CPU_FEATURES_TEST diff --git a/cpu_features/src/impl_aarch64_linux_or_android.c b/cpu_features/src/impl_aarch64_linux_or_android.c new file mode 100644 index 0000000..745beb9 --- /dev/null +++ b/cpu_features/src/impl_aarch64_linux_or_android.c @@ -0,0 +1,150 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_AARCH64 +#if defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) + +#include "cpuinfo_aarch64.h" + +//////////////////////////////////////////////////////////////////////////////// +// Definitions for introspection. +//////////////////////////////////////////////////////////////////////////////// +#define INTROSPECTION_TABLE \ + LINE(AARCH64_FP, fp, "fp", AARCH64_HWCAP_FP, 0) \ + LINE(AARCH64_ASIMD, asimd, "asimd", AARCH64_HWCAP_ASIMD, 0) \ + LINE(AARCH64_EVTSTRM, evtstrm, "evtstrm", AARCH64_HWCAP_EVTSTRM, 0) \ + LINE(AARCH64_AES, aes, "aes", AARCH64_HWCAP_AES, 0) \ + LINE(AARCH64_PMULL, pmull, "pmull", AARCH64_HWCAP_PMULL, 0) \ + LINE(AARCH64_SHA1, sha1, "sha1", AARCH64_HWCAP_SHA1, 0) \ + LINE(AARCH64_SHA2, sha2, "sha2", AARCH64_HWCAP_SHA2, 0) \ + LINE(AARCH64_CRC32, crc32, "crc32", AARCH64_HWCAP_CRC32, 0) \ + LINE(AARCH64_ATOMICS, atomics, "atomics", AARCH64_HWCAP_ATOMICS, 0) \ + LINE(AARCH64_FPHP, fphp, "fphp", AARCH64_HWCAP_FPHP, 0) \ + LINE(AARCH64_ASIMDHP, asimdhp, "asimdhp", AARCH64_HWCAP_ASIMDHP, 0) \ + LINE(AARCH64_CPUID, cpuid, "cpuid", AARCH64_HWCAP_CPUID, 0) \ + LINE(AARCH64_ASIMDRDM, asimdrdm, "asimdrdm", AARCH64_HWCAP_ASIMDRDM, 0) \ + LINE(AARCH64_JSCVT, jscvt, "jscvt", AARCH64_HWCAP_JSCVT, 0) \ + LINE(AARCH64_FCMA, fcma, "fcma", AARCH64_HWCAP_FCMA, 0) \ + LINE(AARCH64_LRCPC, lrcpc, "lrcpc", AARCH64_HWCAP_LRCPC, 0) \ + LINE(AARCH64_DCPOP, dcpop, "dcpop", AARCH64_HWCAP_DCPOP, 0) \ + LINE(AARCH64_SHA3, sha3, "sha3", AARCH64_HWCAP_SHA3, 0) \ + LINE(AARCH64_SM3, sm3, "sm3", AARCH64_HWCAP_SM3, 0) \ + LINE(AARCH64_SM4, sm4, "sm4", AARCH64_HWCAP_SM4, 0) \ + LINE(AARCH64_ASIMDDP, asimddp, "asimddp", AARCH64_HWCAP_ASIMDDP, 0) \ + LINE(AARCH64_SHA512, sha512, "sha512", AARCH64_HWCAP_SHA512, 0) \ + LINE(AARCH64_SVE, sve, "sve", AARCH64_HWCAP_SVE, 0) \ + LINE(AARCH64_ASIMDFHM, asimdfhm, "asimdfhm", AARCH64_HWCAP_ASIMDFHM, 0) \ + LINE(AARCH64_DIT, dit, "dit", AARCH64_HWCAP_DIT, 0) \ + LINE(AARCH64_USCAT, uscat, "uscat", AARCH64_HWCAP_USCAT, 0) \ + LINE(AARCH64_ILRCPC, ilrcpc, "ilrcpc", AARCH64_HWCAP_ILRCPC, 0) \ + LINE(AARCH64_FLAGM, flagm, "flagm", AARCH64_HWCAP_FLAGM, 0) \ + LINE(AARCH64_SSBS, ssbs, "ssbs", AARCH64_HWCAP_SSBS, 0) \ + LINE(AARCH64_SB, sb, "sb", AARCH64_HWCAP_SB, 0) \ + LINE(AARCH64_PACA, paca, "paca", AARCH64_HWCAP_PACA, 0) \ + LINE(AARCH64_PACG, pacg, "pacg", AARCH64_HWCAP_PACG, 0) \ + LINE(AARCH64_DCPODP, dcpodp, "dcpodp", 0, AARCH64_HWCAP2_DCPODP) \ + LINE(AARCH64_SVE2, sve2, "sve2", 0, AARCH64_HWCAP2_SVE2) \ + LINE(AARCH64_SVEAES, sveaes, "sveaes", 0, AARCH64_HWCAP2_SVEAES) \ + LINE(AARCH64_SVEPMULL, svepmull, "svepmull", 0, AARCH64_HWCAP2_SVEPMULL) \ + LINE(AARCH64_SVEBITPERM, svebitperm, "svebitperm", 0, \ + AARCH64_HWCAP2_SVEBITPERM) \ + LINE(AARCH64_SVESHA3, svesha3, "svesha3", 0, AARCH64_HWCAP2_SVESHA3) \ + LINE(AARCH64_SVESM4, svesm4, "svesm4", 0, AARCH64_HWCAP2_SVESM4) \ + LINE(AARCH64_FLAGM2, flagm2, "flagm2", 0, AARCH64_HWCAP2_FLAGM2) \ + LINE(AARCH64_FRINT, frint, "frint", 0, AARCH64_HWCAP2_FRINT) \ + LINE(AARCH64_SVEI8MM, svei8mm, "svei8mm", 0, AARCH64_HWCAP2_SVEI8MM) \ + LINE(AARCH64_SVEF32MM, svef32mm, "svef32mm", 0, AARCH64_HWCAP2_SVEF32MM) \ + LINE(AARCH64_SVEF64MM, svef64mm, "svef64mm", 0, AARCH64_HWCAP2_SVEF64MM) \ + LINE(AARCH64_SVEBF16, svebf16, "svebf16", 0, AARCH64_HWCAP2_SVEBF16) \ + LINE(AARCH64_I8MM, i8mm, "i8mm", 0, AARCH64_HWCAP2_I8MM) \ + LINE(AARCH64_BF16, bf16, "bf16", 0, AARCH64_HWCAP2_BF16) \ + LINE(AARCH64_DGH, dgh, "dgh", 0, AARCH64_HWCAP2_DGH) \ + LINE(AARCH64_RNG, rng, "rng", 0, AARCH64_HWCAP2_RNG) \ + LINE(AARCH64_BTI, bti, "bti", 0, AARCH64_HWCAP2_BTI) \ + LINE(AARCH64_MTE, mte, "mte", 0, AARCH64_HWCAP2_MTE) +#define INTROSPECTION_PREFIX Aarch64 +#define INTROSPECTION_ENUM_PREFIX AARCH64 +#include "define_introspection_and_hwcaps.inl" + +//////////////////////////////////////////////////////////////////////////////// +// Implementation. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include "internal/bit_utils.h" +#include "internal/filesystem.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +static bool HandleAarch64Line(const LineResult result, + Aarch64Info* const info) { + StringView line = result.line; + StringView key, value; + if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { + if (CpuFeatures_StringView_IsEquals(key, str("Features"))) { + for (size_t i = 0; i < AARCH64_LAST_; ++i) { + kSetters[i](&info->features, CpuFeatures_StringView_HasWord( + value, kCpuInfoFlags[i], ' ')); + } + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU implementer"))) { + info->implementer = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU variant"))) { + info->variant = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU part"))) { + info->part = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU revision"))) { + info->revision = CpuFeatures_StringView_ParsePositiveNumber(value); + } + } + return !result.eof; +} + +static void FillProcCpuInfoData(Aarch64Info* const info) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandleAarch64Line(StackLineReader_NextLine(&reader), info)) { + break; + } + } + CpuFeatures_CloseFile(fd); + } +} + +static const Aarch64Info kEmptyAarch64Info; + +Aarch64Info GetAarch64Info(void) { + // capabilities are fetched from both getauxval and /proc/cpuinfo so we can + // have some information if the executable is sandboxed (aka no access to + // /proc/cpuinfo). + Aarch64Info info = kEmptyAarch64Info; + + FillProcCpuInfoData(&info); + const HardwareCapabilities hwcaps = CpuFeatures_GetHardwareCapabilities(); + for (size_t i = 0; i < AARCH64_LAST_; ++i) { + if (CpuFeatures_IsHwCapsSet(kHardwareCapabilities[i], hwcaps)) { + kSetters[i](&info.features, true); + } + } + + return info; +} + +#endif // defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) +#endif // CPU_FEATURES_ARCH_AARCH64 diff --git a/cpu_features/src/impl_arm_linux_or_android.c b/cpu_features/src/impl_arm_linux_or_android.c new file mode 100644 index 0000000..d110a15 --- /dev/null +++ b/cpu_features/src/impl_arm_linux_or_android.c @@ -0,0 +1,211 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_ARM +#if defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) + +#include "cpuinfo_arm.h" + +//////////////////////////////////////////////////////////////////////////////// +// Definitions for introspection. +//////////////////////////////////////////////////////////////////////////////// +#define INTROSPECTION_TABLE \ + LINE(ARM_SWP, swp, "swp", ARM_HWCAP_SWP, 0) \ + LINE(ARM_HALF, half, "half", ARM_HWCAP_HALF, 0) \ + LINE(ARM_THUMB, thumb, "thumb", ARM_HWCAP_THUMB, 0) \ + LINE(ARM_26BIT, _26bit, "26bit", ARM_HWCAP_26BIT, 0) \ + LINE(ARM_FASTMULT, fastmult, "fastmult", ARM_HWCAP_FAST_MULT, 0) \ + LINE(ARM_FPA, fpa, "fpa", ARM_HWCAP_FPA, 0) \ + LINE(ARM_VFP, vfp, "vfp", ARM_HWCAP_VFP, 0) \ + LINE(ARM_EDSP, edsp, "edsp", ARM_HWCAP_EDSP, 0) \ + LINE(ARM_JAVA, java, "java", ARM_HWCAP_JAVA, 0) \ + LINE(ARM_IWMMXT, iwmmxt, "iwmmxt", ARM_HWCAP_IWMMXT, 0) \ + LINE(ARM_CRUNCH, crunch, "crunch", ARM_HWCAP_CRUNCH, 0) \ + LINE(ARM_THUMBEE, thumbee, "thumbee", ARM_HWCAP_THUMBEE, 0) \ + LINE(ARM_NEON, neon, "neon", ARM_HWCAP_NEON, 0) \ + LINE(ARM_VFPV3, vfpv3, "vfpv3", ARM_HWCAP_VFPV3, 0) \ + LINE(ARM_VFPV3D16, vfpv3d16, "vfpv3d16", ARM_HWCAP_VFPV3D16, 0) \ + LINE(ARM_TLS, tls, "tls", ARM_HWCAP_TLS, 0) \ + LINE(ARM_VFPV4, vfpv4, "vfpv4", ARM_HWCAP_VFPV4, 0) \ + LINE(ARM_IDIVA, idiva, "idiva", ARM_HWCAP_IDIVA, 0) \ + LINE(ARM_IDIVT, idivt, "idivt", ARM_HWCAP_IDIVT, 0) \ + LINE(ARM_VFPD32, vfpd32, "vfpd32", ARM_HWCAP_VFPD32, 0) \ + LINE(ARM_LPAE, lpae, "lpae", ARM_HWCAP_LPAE, 0) \ + LINE(ARM_EVTSTRM, evtstrm, "evtstrm", ARM_HWCAP_EVTSTRM, 0) \ + LINE(ARM_AES, aes, "aes", 0, ARM_HWCAP2_AES) \ + LINE(ARM_PMULL, pmull, "pmull", 0, ARM_HWCAP2_PMULL) \ + LINE(ARM_SHA1, sha1, "sha1", 0, ARM_HWCAP2_SHA1) \ + LINE(ARM_SHA2, sha2, "sha2", 0, ARM_HWCAP2_SHA2) \ + LINE(ARM_CRC32, crc32, "crc32", 0, ARM_HWCAP2_CRC32) +#define INTROSPECTION_PREFIX Arm +#define INTROSPECTION_ENUM_PREFIX ARM +#include "define_introspection_and_hwcaps.inl" + +//////////////////////////////////////////////////////////////////////////////// +// Implementation. +//////////////////////////////////////////////////////////////////////////////// + +#include +#include + +#include "internal/bit_utils.h" +#include "internal/filesystem.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +typedef struct { + bool processor_reports_armv6; + bool hardware_reports_goldfish; +} ProcCpuInfoData; + +static int IndexOfNonDigit(StringView str) { + size_t index = 0; + while (str.size && isdigit(CpuFeatures_StringView_Front(str))) { + str = CpuFeatures_StringView_PopFront(str, 1); + ++index; + } + return index; +} + +static bool HandleArmLine(const LineResult result, ArmInfo* const info, + ProcCpuInfoData* const proc_info) { + StringView line = result.line; + StringView key, value; + if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { + if (CpuFeatures_StringView_IsEquals(key, str("Features"))) { + for (size_t i = 0; i < ARM_LAST_; ++i) { + kSetters[i](&info->features, CpuFeatures_StringView_HasWord( + value, kCpuInfoFlags[i], ' ')); + } + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU implementer"))) { + info->implementer = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU variant"))) { + info->variant = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU part"))) { + info->part = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU revision"))) { + info->revision = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU architecture"))) { + // CPU architecture is a number that may be followed by letters. e.g. + // "6TEJ", "7". + const StringView digits = + CpuFeatures_StringView_KeepFront(value, IndexOfNonDigit(value)); + info->architecture = CpuFeatures_StringView_ParsePositiveNumber(digits); + } else if (CpuFeatures_StringView_IsEquals(key, str("Processor")) || + CpuFeatures_StringView_IsEquals(key, str("model name"))) { + // Android reports this in a non-Linux standard "Processor" but sometimes + // also in "model name", Linux reports it only in "model name" + // see RaspberryPiZero (Linux) vs InvalidArmv7 (Android) test-cases + proc_info->processor_reports_armv6 = + CpuFeatures_StringView_IndexOf(value, str("(v6l)")) >= 0; + } else if (CpuFeatures_StringView_IsEquals(key, str("Hardware"))) { + proc_info->hardware_reports_goldfish = + CpuFeatures_StringView_IsEquals(value, str("Goldfish")); + } + } + return !result.eof; +} + +uint32_t GetArmCpuId(const ArmInfo* const info) { + return (ExtractBitRange(info->implementer, 7, 0) << 24) | + (ExtractBitRange(info->variant, 3, 0) << 20) | + (ExtractBitRange(info->part, 11, 0) << 4) | + (ExtractBitRange(info->revision, 3, 0) << 0); +} + +static void FixErrors(ArmInfo* const info, + ProcCpuInfoData* const proc_cpu_info_data) { + // Fixing Samsung kernel reporting invalid cpu architecture. + // http://code.google.com/p/android/issues/detail?id=10812 + if (proc_cpu_info_data->processor_reports_armv6 && info->architecture >= 7) { + info->architecture = 6; + } + + // Handle kernel configuration bugs that prevent the correct reporting of CPU + // features. + switch (GetArmCpuId(info)) { + case 0x4100C080: + // Special case: The emulator-specific Android 4.2 kernel fails to report + // support for the 32-bit ARM IDIV instruction. Technically, this is a + // feature of the virtual CPU implemented by the emulator. Note that it + // could also support Thumb IDIV in the future, and this will have to be + // slightly updated. + if (info->architecture >= 7 && + proc_cpu_info_data->hardware_reports_goldfish) { + info->features.idiva = true; + } + break; + case 0x511004D0: + // https://crbug.com/341598. + info->features.neon = false; + break; + case 0x510006F2: + case 0x510006F3: + // The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report + // IDIV support. + info->features.idiva = true; + info->features.idivt = true; + break; + } + + // Propagate cpu features. + if (info->features.vfpv4) info->features.vfpv3 = true; + if (info->features.neon) info->features.vfpv3 = true; + if (info->features.vfpv3) info->features.vfp = true; +} + +static void FillProcCpuInfoData(ArmInfo* const info, + ProcCpuInfoData* proc_cpu_info_data) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandleArmLine(StackLineReader_NextLine(&reader), info, + proc_cpu_info_data)) { + break; + } + } + CpuFeatures_CloseFile(fd); + } +} + +static const ArmInfo kEmptyArmInfo; + +static const ProcCpuInfoData kEmptyProcCpuInfoData; + +ArmInfo GetArmInfo(void) { + // capabilities are fetched from both getauxval and /proc/cpuinfo so we can + // have some information if the executable is sandboxed (aka no access to + // /proc/cpuinfo). + ArmInfo info = kEmptyArmInfo; + ProcCpuInfoData proc_cpu_info_data = kEmptyProcCpuInfoData; + + FillProcCpuInfoData(&info, &proc_cpu_info_data); + const HardwareCapabilities hwcaps = CpuFeatures_GetHardwareCapabilities(); + for (size_t i = 0; i < ARM_LAST_; ++i) { + if (CpuFeatures_IsHwCapsSet(kHardwareCapabilities[i], hwcaps)) { + kSetters[i](&info.features, true); + } + } + + FixErrors(&info, &proc_cpu_info_data); + + return info; +} + +#endif // defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) +#endif // CPU_FEATURES_ARCH_ARM diff --git a/cpu_features/src/impl_mips_linux_or_android.c b/cpu_features/src/impl_mips_linux_or_android.c new file mode 100644 index 0000000..9a3dc2f --- /dev/null +++ b/cpu_features/src/impl_mips_linux_or_android.c @@ -0,0 +1,88 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_MIPS +#if defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) + +#include "cpuinfo_mips.h" + +//////////////////////////////////////////////////////////////////////////////// +// Definitions for introspection. +//////////////////////////////////////////////////////////////////////////////// +#define INTROSPECTION_TABLE \ + LINE(MIPS_MSA, msa, "msa", MIPS_HWCAP_MSA, 0) \ + LINE(MIPS_EVA, eva, "eva", 0, 0) \ + LINE(MIPS_R6, r6, "r6", MIPS_HWCAP_R6, 0) +#define INTROSPECTION_PREFIX Mips +#define INTROSPECTION_ENUM_PREFIX MIPS +#include "define_introspection_and_hwcaps.inl" + +//////////////////////////////////////////////////////////////////////////////// +// Implementation. +//////////////////////////////////////////////////////////////////////////////// + +#include "internal/filesystem.h" +#include "internal/hwcaps.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +static bool HandleMipsLine(const LineResult result, + MipsFeatures* const features) { + StringView key, value; + // See tests for an example. + if (CpuFeatures_StringView_GetAttributeKeyValue(result.line, &key, &value)) { + if (CpuFeatures_StringView_IsEquals(key, str("ASEs implemented"))) { + for (size_t i = 0; i < MIPS_LAST_; ++i) { + kSetters[i](features, CpuFeatures_StringView_HasWord( + value, kCpuInfoFlags[i], ' ')); + } + } + } + return !result.eof; +} + +static void FillProcCpuInfoData(MipsFeatures* const features) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandleMipsLine(StackLineReader_NextLine(&reader), features)) { + break; + } + } + CpuFeatures_CloseFile(fd); + } +} + +static const MipsInfo kEmptyMipsInfo; + +MipsInfo GetMipsInfo(void) { + // capabilities are fetched from both getauxval and /proc/cpuinfo so we can + // have some information if the executable is sandboxed (aka no access to + // /proc/cpuinfo). + MipsInfo info = kEmptyMipsInfo; + + FillProcCpuInfoData(&info.features); + const HardwareCapabilities hwcaps = CpuFeatures_GetHardwareCapabilities(); + for (size_t i = 0; i < MIPS_LAST_; ++i) { + if (CpuFeatures_IsHwCapsSet(kHardwareCapabilities[i], hwcaps)) { + kSetters[i](&info.features, true); + } + } + return info; +} + +#endif // defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) +#endif // CPU_FEATURES_ARCH_MIPS diff --git a/cpu_features/src/impl_ppc_linux.c b/cpu_features/src/impl_ppc_linux.c new file mode 100644 index 0000000..13a381a --- /dev/null +++ b/cpu_features/src/impl_ppc_linux.c @@ -0,0 +1,162 @@ +// Copyright 2018 IBM. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_PPC +#ifdef CPU_FEATURES_OS_LINUX + +#include "cpuinfo_ppc.h" + +//////////////////////////////////////////////////////////////////////////////// +// Definitions for introspection. +//////////////////////////////////////////////////////////////////////////////// +#define INTROSPECTION_TABLE \ + LINE(PPC_32, ppc32, "ppc32", PPC_FEATURE_32, 0) \ + LINE(PPC_64, ppc64, "ppc64", PPC_FEATURE_64, 0) \ + LINE(PPC_601_INSTR, ppc601, "ppc601", PPC_FEATURE_601_INSTR, 0) \ + LINE(PPC_HAS_ALTIVEC, altivec, "altivec", PPC_FEATURE_HAS_ALTIVEC, 0) \ + LINE(PPC_HAS_FPU, fpu, "fpu", PPC_FEATURE_HAS_FPU, 0) \ + LINE(PPC_HAS_MMU, mmu, "mmu", PPC_FEATURE_HAS_MMU, 0) \ + LINE(PPC_HAS_4xxMAC, mac_4xx, "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0) \ + LINE(PPC_UNIFIED_CACHE, unifiedcache, "ucache", PPC_FEATURE_UNIFIED_CACHE, \ + 0) \ + LINE(PPC_HAS_SPE, spe, "spe", PPC_FEATURE_HAS_SPE, 0) \ + LINE(PPC_HAS_EFP_SINGLE, efpsingle, "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, \ + 0) \ + LINE(PPC_HAS_EFP_DOUBLE, efpdouble, "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, \ + 0) \ + LINE(PPC_NO_TB, no_tb, "notb", PPC_FEATURE_NO_TB, 0) \ + LINE(PPC_POWER4, power4, "power4", PPC_FEATURE_POWER4, 0) \ + LINE(PPC_POWER5, power5, "power5", PPC_FEATURE_POWER5, 0) \ + LINE(PPC_POWER5_PLUS, power5plus, "power5+", PPC_FEATURE_POWER5_PLUS, 0) \ + LINE(PPC_CELL, cell, "cellbe", PPC_FEATURE_CELL, 0) \ + LINE(PPC_BOOKE, booke, "booke", PPC_FEATURE_BOOKE, 0) \ + LINE(PPC_SMT, smt, "smt", PPC_FEATURE_SMT, 0) \ + LINE(PPC_ICACHE_SNOOP, icachesnoop, "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0) \ + LINE(PPC_ARCH_2_05, arch205, "arch_2_05", PPC_FEATURE_ARCH_2_05, 0) \ + LINE(PPC_PA6T, pa6t, "pa6t", PPC_FEATURE_PA6T, 0) \ + LINE(PPC_HAS_DFP, dfp, "dfp", PPC_FEATURE_HAS_DFP, 0) \ + LINE(PPC_POWER6_EXT, power6ext, "power6x", PPC_FEATURE_POWER6_EXT, 0) \ + LINE(PPC_ARCH_2_06, arch206, "arch_2_06", PPC_FEATURE_ARCH_2_06, 0) \ + LINE(PPC_HAS_VSX, vsx, "vsx", PPC_FEATURE_HAS_VSX, 0) \ + LINE(PPC_PSERIES_PERFMON_COMPAT, pseries_perfmon_compat, "archpmu", \ + PPC_FEATURE_PSERIES_PERFMON_COMPAT, 0) \ + LINE(PPC_TRUE_LE, truele, "true_le", PPC_FEATURE_TRUE_LE, 0) \ + LINE(PPC_PPC_LE, ppcle, "ppcle", PPC_FEATURE_PPC_LE, 0) \ + LINE(PPC_ARCH_2_07, arch207, "arch_2_07", 0, PPC_FEATURE2_ARCH_2_07) \ + LINE(PPC_HTM, htm, "htm", 0, PPC_FEATURE2_HTM) \ + LINE(PPC_DSCR, dscr, "dscr", 0, PPC_FEATURE2_DSCR) \ + LINE(PPC_EBB, ebb, "ebb", 0, PPC_FEATURE2_EBB) \ + LINE(PPC_ISEL, isel, "isel", 0, PPC_FEATURE2_ISEL) \ + LINE(PPC_TAR, tar, "tar", 0, PPC_FEATURE2_TAR) \ + LINE(PPC_VEC_CRYPTO, vcrypto, "vcrypto", 0, PPC_FEATURE2_VEC_CRYPTO) \ + LINE(PPC_HTM_NOSC, htm_nosc, "htm-nosc", 0, PPC_FEATURE2_HTM_NOSC) \ + LINE(PPC_ARCH_3_00, arch300, "arch_3_00", 0, PPC_FEATURE2_ARCH_3_00) \ + LINE(PPC_HAS_IEEE128, ieee128, "ieee128", 0, PPC_FEATURE2_HAS_IEEE128) \ + LINE(PPC_DARN, darn, "darn", 0, PPC_FEATURE2_DARN) \ + LINE(PPC_SCV, scv, "scv", 0, PPC_FEATURE2_SCV) \ + LINE(PPC_HTM_NO_SUSPEND, htm_no_suspend, "htm-no-suspend", 0, \ + PPC_FEATURE2_HTM_NO_SUSPEND) +#define INTROSPECTION_PREFIX PPC +#define INTROSPECTION_ENUM_PREFIX PPC +#include "define_introspection_and_hwcaps.inl" + +//////////////////////////////////////////////////////////////////////////////// +// Implementation. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include "internal/bit_utils.h" +#include "internal/filesystem.h" +#include "internal/hwcaps.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +static bool HandlePPCLine(const LineResult result, + PPCPlatformStrings* const strings) { + StringView line = result.line; + StringView key, value; + if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { + if (CpuFeatures_StringView_HasWord(key, "platform", ' ')) { + CpuFeatures_StringView_CopyString(value, strings->platform, + sizeof(strings->platform)); + } else if (CpuFeatures_StringView_IsEquals(key, str("model"))) { + CpuFeatures_StringView_CopyString(value, strings->model, + sizeof(strings->platform)); + } else if (CpuFeatures_StringView_IsEquals(key, str("machine"))) { + CpuFeatures_StringView_CopyString(value, strings->machine, + sizeof(strings->platform)); + } else if (CpuFeatures_StringView_IsEquals(key, str("cpu"))) { + CpuFeatures_StringView_CopyString(value, strings->cpu, + sizeof(strings->platform)); + } + } + return !result.eof; +} + +static void FillProcCpuInfoData(PPCPlatformStrings* const strings) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandlePPCLine(StackLineReader_NextLine(&reader), strings)) { + break; + } + } + CpuFeatures_CloseFile(fd); + } +} + +static const PPCInfo kEmptyPPCInfo; + +PPCInfo GetPPCInfo(void) { + /* + * On Power feature flags aren't currently in cpuinfo so we only look at + * the auxilary vector. + */ + PPCInfo info = kEmptyPPCInfo; + const HardwareCapabilities hwcaps = CpuFeatures_GetHardwareCapabilities(); + for (size_t i = 0; i < PPC_LAST_; ++i) { + if (CpuFeatures_IsHwCapsSet(kHardwareCapabilities[i], hwcaps)) { + kSetters[i](&info.features, true); + } + } + return info; +} + +static const PPCPlatformStrings kEmptyPPCPlatformStrings; + +PPCPlatformStrings GetPPCPlatformStrings(void) { + PPCPlatformStrings strings = kEmptyPPCPlatformStrings; + const char* platform = CpuFeatures_GetPlatformPointer(); + const char* base_platform = CpuFeatures_GetBasePlatformPointer(); + + FillProcCpuInfoData(&strings); + + if (platform != NULL) + CpuFeatures_StringView_CopyString(str(platform), strings.type.platform, + sizeof(strings.type.platform)); + if (base_platform != NULL) + CpuFeatures_StringView_CopyString(str(base_platform), + strings.type.base_platform, + sizeof(strings.type.base_platform)); + + return strings; +} + +#endif // CPU_FEATURES_OS_LINUX +#endif // CPU_FEATURES_ARCH_PPC diff --git a/cpu_features/src/impl_x86__base_implementation.inl b/cpu_features/src/impl_x86__base_implementation.inl new file mode 100644 index 0000000..0ac6609 --- /dev/null +++ b/cpu_features/src/impl_x86__base_implementation.inl @@ -0,0 +1,1724 @@ +// Copyright 2017 Google LLC +// Copyright 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "copy.inl" +#include "cpuinfo_x86.h" +#include "equals.inl" +#include "internal/bit_utils.h" +#include "internal/cpuid_x86.h" + +#if !defined(CPU_FEATURES_ARCH_X86) +#error "Cannot compile cpuinfo_x86 on a non x86 platform." +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Definitions for CpuId and GetXCR0Eax. +//////////////////////////////////////////////////////////////////////////////// + +#if defined(CPU_FEATURES_MOCK_CPUID_X86) +// Implementation will be provided by test/cpuinfo_x86_test.cc. +#elif defined(CPU_FEATURES_COMPILER_CLANG) || defined(CPU_FEATURES_COMPILER_GCC) + +#include + +Leaf GetCpuidLeaf(uint32_t leaf_id, int ecx) { + Leaf leaf; + __cpuid_count(leaf_id, ecx, leaf.eax, leaf.ebx, leaf.ecx, leaf.edx); + return leaf; +} + +uint32_t GetXCR0Eax(void) { + uint32_t eax, edx; + /* named form of xgetbv not supported on OSX, so must use byte form, see: + https://github.com/asmjit/asmjit/issues/78 + */ + __asm(".byte 0x0F, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0)); + return eax; +} + +#elif defined(CPU_FEATURES_COMPILER_MSC) + +#include +#include // For __cpuidex() + +Leaf GetCpuidLeaf(uint32_t leaf_id, int ecx) { + Leaf leaf; + int data[4]; + __cpuidex(data, leaf_id, ecx); + leaf.eax = data[0]; + leaf.ebx = data[1]; + leaf.ecx = data[2]; + leaf.edx = data[3]; + return leaf; +} + +uint32_t GetXCR0Eax(void) { return (uint32_t)_xgetbv(0); } + +#else +#error "Unsupported compiler, x86 cpuid requires either GCC, Clang or MSVC." +#endif + +static Leaf CpuId(uint32_t leaf_id) { return GetCpuidLeaf(leaf_id, 0); } + +static const Leaf kEmptyLeaf; + +static Leaf SafeCpuIdEx(uint32_t max_cpuid_leaf, uint32_t leaf_id, int ecx) { + if (leaf_id <= max_cpuid_leaf) { + return GetCpuidLeaf(leaf_id, ecx); + } else { + return kEmptyLeaf; + } +} + +static Leaf SafeCpuId(uint32_t max_cpuid_leaf, uint32_t leaf_id) { + return SafeCpuIdEx(max_cpuid_leaf, leaf_id, 0); +} + +//////////////////////////////////////////////////////////////////////////////// +// OS support +// TODO: Add documentation +//////////////////////////////////////////////////////////////////////////////// + +#define MASK_XMM 0x2 +#define MASK_YMM 0x4 +#define MASK_MASKREG 0x20 +#define MASK_ZMM0_15 0x40 +#define MASK_ZMM16_31 0x80 +#define MASK_XTILECFG 0x20000 +#define MASK_XTILEDATA 0x40000 + +static bool HasMask(uint32_t value, uint32_t mask) { + return (value & mask) == mask; +} + +// Checks that operating system saves and restores xmm registers during context +// switches. +static bool HasXmmOsXSave(uint32_t xcr0_eax) { + return HasMask(xcr0_eax, MASK_XMM); +} + +// Checks that operating system saves and restores ymm registers during context +// switches. +static bool HasYmmOsXSave(uint32_t xcr0_eax) { + return HasMask(xcr0_eax, MASK_XMM | MASK_YMM); +} + +// Checks that operating system saves and restores zmm registers during context +// switches. +static bool HasZmmOsXSave(uint32_t xcr0_eax) { + return HasMask(xcr0_eax, MASK_XMM | MASK_YMM | MASK_MASKREG | MASK_ZMM0_15 | + MASK_ZMM16_31); +} + +// Checks that operating system saves and restores AMX/TMUL state during context +// switches. +static bool HasTmmOsXSave(uint32_t xcr0_eax) { + return HasMask(xcr0_eax, MASK_XMM | MASK_YMM | MASK_MASKREG | MASK_ZMM0_15 | + MASK_ZMM16_31 | MASK_XTILECFG | MASK_XTILEDATA); +} + +//////////////////////////////////////////////////////////////////////////////// +// Vendor +//////////////////////////////////////////////////////////////////////////////// + +static void SetVendor(const Leaf leaf, char* const vendor) { + *(uint32_t*)(vendor) = leaf.ebx; + *(uint32_t*)(vendor + 4) = leaf.edx; + *(uint32_t*)(vendor + 8) = leaf.ecx; + vendor[12] = '\0'; +} + +static int IsVendor(const Leaf leaf, const char* const name) { + const uint32_t ebx = *(const uint32_t*)(name); + const uint32_t edx = *(const uint32_t*)(name + 4); + const uint32_t ecx = *(const uint32_t*)(name + 8); + return leaf.ebx == ebx && leaf.ecx == ecx && leaf.edx == edx; +} + +static int IsVendorByX86Info(const X86Info* info, const char* const name) { + return equals(info->vendor, name, sizeof(info->vendor)); +} + +void FillX86BrandString(char brand_string[49]) { + const Leaf leaf_ext_0 = CpuId(0x80000000); + const uint32_t max_cpuid_leaf_ext = leaf_ext_0.eax; + const Leaf leaves[3] = { + SafeCpuId(max_cpuid_leaf_ext, 0x80000002), + SafeCpuId(max_cpuid_leaf_ext, 0x80000003), + SafeCpuId(max_cpuid_leaf_ext, 0x80000004), + }; +#if __STDC_VERSION__ >= 201112L + _Static_assert(sizeof(leaves) == 48, "Leaves must be packed"); +#endif + copy(brand_string, (const char*)(leaves), 48); + brand_string[48] = '\0'; +} + +//////////////////////////////////////////////////////////////////////////////// +// CpuId +//////////////////////////////////////////////////////////////////////////////// + +static bool HasSecondFMA(uint32_t model) { + // Skylake server + if (model == 0x55) { + char proc_name[49] = {0}; + FillX86BrandString(proc_name); + // detect Xeon + if (proc_name[9] == 'X') { + // detect Silver or Bronze + if (proc_name[17] == 'S' || proc_name[17] == 'B') return false; + // detect Gold 5_20 and below, except for Gold 53__ + if (proc_name[17] == 'G' && proc_name[22] == '5') + return ((proc_name[23] == '3') || + (proc_name[24] == '2' && proc_name[25] == '2')); + // detect Xeon W 210x + if (proc_name[17] == 'W' && proc_name[21] == '0') return false; + // detect Xeon D 2xxx + if (proc_name[17] == 'D' && proc_name[19] == '2' && proc_name[20] == '1') + return false; + } + return true; + } + // Cannon Lake client + if (model == 0x66) return false; + // Ice Lake client + if (model == 0x7d || model == 0x7e) return false; + // This is the right default... + return true; +} + +// Internal structure to hold the OS support for vector operations. +// Avoid to recompute them since each call to cpuid is ~100 cycles. +typedef struct { + bool sse_registers; + bool avx_registers; + bool avx512_registers; + bool amx_registers; +} OsPreserves; + +// These two functions have to be implemented by the OS, that is the file +// including this file. +static void OverrideOsPreserves(OsPreserves* os_preserves); +static void DetectFeaturesFromOs(X86Features* features); + +// Reference https://en.wikipedia.org/wiki/CPUID. +static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, + OsPreserves* os_preserves) { + const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1); + const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7); + const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1); + + const bool have_xsave = IsBitSet(leaf_1.ecx, 26); + const bool have_osxsave = IsBitSet(leaf_1.ecx, 27); + const bool have_xcr0 = have_xsave && have_osxsave; + + const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8); + const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20); + const uint32_t model = ExtractBitRange(leaf_1.eax, 7, 4); + const uint32_t extended_model = ExtractBitRange(leaf_1.eax, 19, 16); + + X86Features* const features = &info->features; + + info->family = extended_family + family; + info->model = (extended_model << 4) + model; + info->stepping = ExtractBitRange(leaf_1.eax, 3, 0); + + features->fpu = IsBitSet(leaf_1.edx, 0); + features->tsc = IsBitSet(leaf_1.edx, 4); + features->cx8 = IsBitSet(leaf_1.edx, 8); + features->clfsh = IsBitSet(leaf_1.edx, 19); + features->mmx = IsBitSet(leaf_1.edx, 23); + features->ss = IsBitSet(leaf_1.edx, 27); + features->pclmulqdq = IsBitSet(leaf_1.ecx, 1); + features->smx = IsBitSet(leaf_1.ecx, 6); + features->cx16 = IsBitSet(leaf_1.ecx, 13); + features->dca = IsBitSet(leaf_1.ecx, 18); + features->movbe = IsBitSet(leaf_1.ecx, 22); + features->popcnt = IsBitSet(leaf_1.ecx, 23); + features->aes = IsBitSet(leaf_1.ecx, 25); + features->f16c = IsBitSet(leaf_1.ecx, 29); + features->rdrnd = IsBitSet(leaf_1.ecx, 30); + features->sgx = IsBitSet(leaf_7.ebx, 2); + features->bmi1 = IsBitSet(leaf_7.ebx, 3); + features->hle = IsBitSet(leaf_7.ebx, 4); + features->bmi2 = IsBitSet(leaf_7.ebx, 8); + features->erms = IsBitSet(leaf_7.ebx, 9); + features->rtm = IsBitSet(leaf_7.ebx, 11); + features->rdseed = IsBitSet(leaf_7.ebx, 18); + features->clflushopt = IsBitSet(leaf_7.ebx, 23); + features->clwb = IsBitSet(leaf_7.ebx, 24); + features->sha = IsBitSet(leaf_7.ebx, 29); + features->vaes = IsBitSet(leaf_7.ecx, 9); + features->vpclmulqdq = IsBitSet(leaf_7.ecx, 10); + features->adx = IsBitSet(leaf_7.ebx, 19); + + ///////////////////////////////////////////////////////////////////////////// + // The following section is devoted to Vector Extensions. + ///////////////////////////////////////////////////////////////////////////// + + // CPU with AVX expose XCR0 which enables checking vector extensions OS + // support through cpuid. + if (have_xcr0) { + // Here we rely exclusively on cpuid for both CPU and OS support of vector + // extensions. + const uint32_t xcr0_eax = GetXCR0Eax(); + os_preserves->sse_registers = HasXmmOsXSave(xcr0_eax); + os_preserves->avx_registers = HasYmmOsXSave(xcr0_eax); + os_preserves->avx512_registers = HasZmmOsXSave(xcr0_eax); + os_preserves->amx_registers = HasTmmOsXSave(xcr0_eax); + OverrideOsPreserves(os_preserves); + + if (os_preserves->sse_registers) { + features->sse = IsBitSet(leaf_1.edx, 25); + features->sse2 = IsBitSet(leaf_1.edx, 26); + features->sse3 = IsBitSet(leaf_1.ecx, 0); + features->ssse3 = IsBitSet(leaf_1.ecx, 9); + features->sse4_1 = IsBitSet(leaf_1.ecx, 19); + features->sse4_2 = IsBitSet(leaf_1.ecx, 20); + } + if (os_preserves->avx_registers) { + features->fma3 = IsBitSet(leaf_1.ecx, 12); + features->avx = IsBitSet(leaf_1.ecx, 28); + features->avx2 = IsBitSet(leaf_7.ebx, 5); + } + if (os_preserves->avx512_registers) { + features->avx512f = IsBitSet(leaf_7.ebx, 16); + features->avx512cd = IsBitSet(leaf_7.ebx, 28); + features->avx512er = IsBitSet(leaf_7.ebx, 27); + features->avx512pf = IsBitSet(leaf_7.ebx, 26); + features->avx512bw = IsBitSet(leaf_7.ebx, 30); + features->avx512dq = IsBitSet(leaf_7.ebx, 17); + features->avx512vl = IsBitSet(leaf_7.ebx, 31); + features->avx512ifma = IsBitSet(leaf_7.ebx, 21); + features->avx512vbmi = IsBitSet(leaf_7.ecx, 1); + features->avx512vbmi2 = IsBitSet(leaf_7.ecx, 6); + features->avx512vnni = IsBitSet(leaf_7.ecx, 11); + features->avx512bitalg = IsBitSet(leaf_7.ecx, 12); + features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14); + features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2); + features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3); + features->avx512_second_fma = HasSecondFMA(info->model); + features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3); + features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5); + features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8); + } + if (os_preserves->amx_registers) { + features->amx_bf16 = IsBitSet(leaf_7.edx, 22); + features->amx_tile = IsBitSet(leaf_7.edx, 24); + features->amx_int8 = IsBitSet(leaf_7.edx, 25); + } + } else { + // When XCR0 is not available (Atom based or older cpus) we need to defer to + // the OS via custom code. + DetectFeaturesFromOs(features); + // Now that we have queried the OS for SSE support, we report this back to + // os_preserves. This is needed in case of AMD CPU's to enable testing of + // sse4a (See ParseExtraAMDCpuId below). + if (features->sse) os_preserves->sse_registers = true; + } +} + +// Reference +// https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented. +static Leaf GetLeafByIdAMD(uint32_t leaf_id) { + uint32_t max_extended = CpuId(0x80000000).eax; + return SafeCpuId(max_extended, leaf_id); +} + +static void ParseExtraAMDCpuId(X86Info* info, OsPreserves os_preserves) { + const Leaf leaf_80000001 = GetLeafByIdAMD(0x80000001); + + X86Features* const features = &info->features; + + if (os_preserves.sse_registers) { + features->sse4a = IsBitSet(leaf_80000001.ecx, 6); + } + + if (os_preserves.avx_registers) { + features->fma4 = IsBitSet(leaf_80000001.ecx, 16); + } +} + +static const X86Info kEmptyX86Info; +static const OsPreserves kEmptyOsPreserves; + +X86Info GetX86Info(void) { + X86Info info = kEmptyX86Info; + const Leaf leaf_0 = CpuId(0); + const bool is_intel = IsVendor(leaf_0, CPU_FEATURES_VENDOR_GENUINE_INTEL); + const bool is_amd = IsVendor(leaf_0, CPU_FEATURES_VENDOR_AUTHENTIC_AMD); + const bool is_hygon = IsVendor(leaf_0, CPU_FEATURES_VENDOR_HYGON_GENUINE); + SetVendor(leaf_0, info.vendor); + if (is_intel || is_amd || is_hygon) { + OsPreserves os_preserves = kEmptyOsPreserves; + const uint32_t max_cpuid_leaf = leaf_0.eax; + ParseCpuId(max_cpuid_leaf, &info, &os_preserves); + if (is_amd || is_hygon) { + ParseExtraAMDCpuId(&info, os_preserves); + } + } + return info; +} + +//////////////////////////////////////////////////////////////////////////////// +// Microarchitecture +//////////////////////////////////////////////////////////////////////////////// + +#define CPUID(FAMILY, MODEL) ((((FAMILY)&0xFF) << 8) | ((MODEL)&0xFF)) + +X86Microarchitecture GetX86Microarchitecture(const X86Info* info) { + if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_GENUINE_INTEL)) { + switch (CPUID(info->family, info->model)) { + case CPUID(0x04, 0x01): + case CPUID(0x04, 0x02): + case CPUID(0x04, 0x03): + case CPUID(0x04, 0x04): + case CPUID(0x04, 0x05): + case CPUID(0x04, 0x07): + case CPUID(0x04, 0x08): + case CPUID(0x04, 0x09): + // https://en.wikichip.org/wiki/intel/microarchitectures/80486 + return INTEL_80486; + case CPUID(0x05, 0x01): + case CPUID(0x05, 0x02): + case CPUID(0x05, 0x04): + case CPUID(0x05, 0x07): + case CPUID(0x05, 0x08): + // https://en.wikichip.org/wiki/intel/microarchitectures/p5 + return INTEL_P5; + case CPUID(0x05, 0x09): + case CPUID(0x05, 0x0A): + // https://en.wikichip.org/wiki/intel/quark + return INTEL_LAKEMONT; + case CPUID(0x06, 0x1C): // Intel(R) Atom(TM) CPU 230 @ 1.60GHz + case CPUID(0x06, 0x35): + case CPUID(0x06, 0x36): + case CPUID(0x06, 0x70): // https://en.wikichip.org/wiki/intel/atom/230 + // https://en.wikipedia.org/wiki/Bonnell_(microarchitecture) + return INTEL_ATOM_BNL; + case CPUID(0x06, 0x37): + case CPUID(0x06, 0x4C): + // https://en.wikipedia.org/wiki/Silvermont + return INTEL_ATOM_SMT; + case CPUID(0x06, 0x5C): + // https://en.wikipedia.org/wiki/Goldmont + return INTEL_ATOM_GMT; + case CPUID(0x06, 0x0F): + case CPUID(0x06, 0x16): + // https://en.wikipedia.org/wiki/Intel_Core_(microarchitecture) + return INTEL_CORE; + case CPUID(0x06, 0x17): + case CPUID(0x06, 0x1D): + // https://en.wikipedia.org/wiki/Penryn_(microarchitecture) + return INTEL_PNR; + case CPUID(0x06, 0x1A): + case CPUID(0x06, 0x1E): + case CPUID(0x06, 0x1F): + case CPUID(0x06, 0x2E): + // https://en.wikipedia.org/wiki/Nehalem_(microarchitecture) + return INTEL_NHM; + case CPUID(0x06, 0x25): + case CPUID(0x06, 0x2C): + case CPUID(0x06, 0x2F): + // https://en.wikipedia.org/wiki/Westmere_(microarchitecture) + return INTEL_WSM; + case CPUID(0x06, 0x2A): + case CPUID(0x06, 0x2D): + // https://en.wikipedia.org/wiki/Sandy_Bridge#Models_and_steppings + return INTEL_SNB; + case CPUID(0x06, 0x3A): + case CPUID(0x06, 0x3E): + // https://en.wikipedia.org/wiki/Ivy_Bridge_(microarchitecture)#Models_and_steppings + return INTEL_IVB; + case CPUID(0x06, 0x3C): + case CPUID(0x06, 0x3F): + case CPUID(0x06, 0x45): + case CPUID(0x06, 0x46): + // https://en.wikipedia.org/wiki/Haswell_(microarchitecture) + return INTEL_HSW; + case CPUID(0x06, 0x3D): + case CPUID(0x06, 0x47): + case CPUID(0x06, 0x4F): + case CPUID(0x06, 0x56): + // https://en.wikipedia.org/wiki/Broadwell_(microarchitecture) + return INTEL_BDW; + case CPUID(0x06, 0x4E): + case CPUID(0x06, 0x55): + case CPUID(0x06, 0x5E): + // https://en.wikipedia.org/wiki/Skylake_(microarchitecture) + return INTEL_SKL; + case CPUID(0x06, 0x66): + // https://en.wikipedia.org/wiki/Cannon_Lake_(microarchitecture) + return INTEL_CNL; + case CPUID(0x06, 0x7D): // client + case CPUID(0x06, 0x7E): // client + case CPUID(0x06, 0x9D): // NNP-I + case CPUID(0x06, 0x6A): // server + case CPUID(0x06, 0x6C): // server + // https://en.wikipedia.org/wiki/Ice_Lake_(microprocessor) + return INTEL_ICL; + case CPUID(0x06, 0x8C): + case CPUID(0x06, 0x8D): + // https://en.wikipedia.org/wiki/Tiger_Lake_(microarchitecture) + return INTEL_TGL; + case CPUID(0x06, 0x8F): + // https://en.wikipedia.org/wiki/Sapphire_Rapids + return INTEL_SPR; + case CPUID(0x06, 0x8E): + switch (info->stepping) { + case 9: + return INTEL_KBL; // https://en.wikipedia.org/wiki/Kaby_Lake + case 10: + return INTEL_CFL; // https://en.wikipedia.org/wiki/Coffee_Lake + case 11: + return INTEL_WHL; // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture) + default: + return X86_UNKNOWN; + } + case CPUID(0x06, 0x9E): + if (info->stepping > 9) { + // https://en.wikipedia.org/wiki/Coffee_Lake + return INTEL_CFL; + } else { + // https://en.wikipedia.org/wiki/Kaby_Lake + return INTEL_KBL; + } + case CPUID(0x06, 0x97): + case CPUID(0x06, 0x9A): + // https://en.wikichip.org/wiki/intel/microarchitectures/alder_lake + return INTEL_ADL; + case CPUID(0x06, 0xA7): + // https://en.wikichip.org/wiki/intel/microarchitectures/rocket_lake + return INTEL_RCL; + case CPUID(0x06, 0x85): + // https://en.wikichip.org/wiki/intel/microarchitectures/knights_mill + return INTEL_KNIGHTS_M; + case CPUID(0x06, 0x57): + // https://en.wikichip.org/wiki/intel/microarchitectures/knights_landing + return INTEL_KNIGHTS_L; + case CPUID(0x0B, 0x00): + // https://en.wikichip.org/wiki/intel/microarchitectures/knights_ferry + return INTEL_KNIGHTS_F; + case CPUID(0x0B, 0x01): + // https://en.wikichip.org/wiki/intel/microarchitectures/knights_corner + return INTEL_KNIGHTS_C; + case CPUID(0x0F, 0x01): + case CPUID(0x0F, 0x02): + case CPUID(0x0F, 0x03): + case CPUID(0x0F, 0x04): + case CPUID(0x0F, 0x06): + // https://en.wikichip.org/wiki/intel/microarchitectures/netburst + return INTEL_NETBURST; + default: + return X86_UNKNOWN; + } + } + if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_AUTHENTIC_AMD)) { + switch (CPUID(info->family, info->model)) { + // https://en.wikichip.org/wiki/amd/cpuid + case CPUID(0xF, 0x04): + case CPUID(0xF, 0x05): + case CPUID(0xF, 0x07): + case CPUID(0xF, 0x08): + case CPUID(0xF, 0x0C): + case CPUID(0xF, 0x0E): + case CPUID(0xF, 0x0F): + case CPUID(0xF, 0x14): + case CPUID(0xF, 0x15): + case CPUID(0xF, 0x17): + case CPUID(0xF, 0x18): + case CPUID(0xF, 0x1B): + case CPUID(0xF, 0x1C): + case CPUID(0xF, 0x1F): + case CPUID(0xF, 0x21): + case CPUID(0xF, 0x23): + case CPUID(0xF, 0x24): + case CPUID(0xF, 0x25): + case CPUID(0xF, 0x27): + case CPUID(0xF, 0x2B): + case CPUID(0xF, 0x2C): + case CPUID(0xF, 0x2F): + case CPUID(0xF, 0x41): + case CPUID(0xF, 0x43): + case CPUID(0xF, 0x48): + case CPUID(0xF, 0x4B): + case CPUID(0xF, 0x4C): + case CPUID(0xF, 0x4F): + case CPUID(0xF, 0x5D): + case CPUID(0xF, 0x5F): + case CPUID(0xF, 0x68): + case CPUID(0xF, 0x6B): + case CPUID(0xF, 0x6F): + case CPUID(0xF, 0x7F): + case CPUID(0xF, 0xC1): + return AMD_HAMMER; + case CPUID(0x10, 0x02): + case CPUID(0x10, 0x04): + case CPUID(0x10, 0x05): + case CPUID(0x10, 0x06): + case CPUID(0x10, 0x08): + case CPUID(0x10, 0x09): + case CPUID(0x10, 0x0A): + return AMD_K10; + case CPUID(0x11, 0x03): + // http://developer.amd.com/wordpress/media/2012/10/41788.pdf + return AMD_K11; + case CPUID(0x12, 0x01): + // https://www.amd.com/system/files/TechDocs/44739_12h_Rev_Gd.pdf + return AMD_K12; + case CPUID(0x14, 0x00): + case CPUID(0x14, 0x01): + case CPUID(0x14, 0x02): + // https://www.amd.com/system/files/TechDocs/47534_14h_Mod_00h-0Fh_Rev_Guide.pdf + return AMD_BOBCAT; + case CPUID(0x15, 0x01): + // https://en.wikichip.org/wiki/amd/microarchitectures/bulldozer + return AMD_BULLDOZER; + case CPUID(0x15, 0x02): + case CPUID(0x15, 0x11): + case CPUID(0x15, 0x13): + // https://en.wikichip.org/wiki/amd/microarchitectures/piledriver + return AMD_PILEDRIVER; + case CPUID(0x15, 0x30): + case CPUID(0x15, 0x38): + // https://en.wikichip.org/wiki/amd/microarchitectures/steamroller + return AMD_STREAMROLLER; + case CPUID(0x15, 0x60): + case CPUID(0x15, 0x65): + case CPUID(0x15, 0x70): + // https://en.wikichip.org/wiki/amd/microarchitectures/excavator + return AMD_EXCAVATOR; + case CPUID(0x16, 0x00): + return AMD_JAGUAR; + case CPUID(0x16, 0x30): + return AMD_PUMA; + case CPUID(0x17, 0x01): + case CPUID(0x17, 0x11): + case CPUID(0x17, 0x18): + case CPUID(0x17, 0x20): + // https://en.wikichip.org/wiki/amd/microarchitectures/zen + return AMD_ZEN; + case CPUID(0x17, 0x08): + // https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B + return AMD_ZEN_PLUS; + case CPUID(0x17, 0x31): + case CPUID(0x17, 0x47): + case CPUID(0x17, 0x60): + case CPUID(0x17, 0x68): + case CPUID(0x17, 0x71): + case CPUID(0x17, 0x90): + case CPUID(0x17, 0x98): + // https://en.wikichip.org/wiki/amd/microarchitectures/zen_2 + return AMD_ZEN2; + case CPUID(0x19, 0x01): + case CPUID(0x19, 0x21): + case CPUID(0x19, 0x30): + case CPUID(0x19, 0x40): + case CPUID(0x19, 0x50): + // https://en.wikichip.org/wiki/amd/microarchitectures/zen_3 + return AMD_ZEN3; + default: + return X86_UNKNOWN; + } + } + if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_HYGON_GENUINE)) { + switch (CPUID(info->family, info->model)) { + case CPUID(0x18, 0x00): + return AMD_ZEN; + } + } + return X86_UNKNOWN; +} + +//////////////////////////////////////////////////////////////////////////////// +// CacheInfo +//////////////////////////////////////////////////////////////////////////////// + +static const CacheLevelInfo kEmptyCacheLevelInfo; + +static CacheLevelInfo GetCacheLevelInfo(const uint32_t reg) { + const int UNDEF = -1; + const int KiB = 1024; + const int MiB = 1024 * KiB; + switch (reg) { + case 0x01: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 32, + .partitioning = 0}; + case 0x02: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * MiB, + .ways = 0xFF, + .line_size = UNDEF, + .tlb_entries = 2, + .partitioning = 0}; + case 0x03: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 64, + .partitioning = 0}; + case 0x04: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * MiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 8, + .partitioning = 0}; + case 0x05: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * MiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 32, + .partitioning = 0}; + case 0x06: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_INSTRUCTION, + .cache_size = 8 * KiB, + .ways = 4, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x08: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_INSTRUCTION, + .cache_size = 16 * KiB, + .ways = 4, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x09: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_INSTRUCTION, + .cache_size = 32 * KiB, + .ways = 4, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x0A: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 8 * KiB, + .ways = 2, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x0B: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * MiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 4, + .partitioning = 0}; + case 0x0C: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 16 * KiB, + .ways = 4, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x0D: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 16 * KiB, + .ways = 4, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x0E: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 24 * KiB, + .ways = 6, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x1D: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 128 * KiB, + .ways = 2, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x21: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 256 * KiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x22: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 512 * KiB, + .ways = 4, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 2}; + case 0x23: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 1 * MiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 2}; + case 0x24: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 1 * MiB, + .ways = 16, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x25: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 2 * MiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 2}; + case 0x29: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 4 * MiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 2}; + case 0x2C: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 32 * KiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x30: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_INSTRUCTION, + .cache_size = 32 * KiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x40: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = UNDEF, + .ways = UNDEF, + .line_size = UNDEF, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x41: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 128 * KiB, + .ways = 4, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x42: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 256 * KiB, + .ways = 4, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x43: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 512 * KiB, + .ways = 4, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x44: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 1 * MiB, + .ways = 4, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x45: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 2 * MiB, + .ways = 4, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x46: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 4 * MiB, + .ways = 4, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x47: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 8 * MiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x48: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 3 * MiB, + .ways = 12, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x49: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 4 * MiB, + .ways = 16, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case (0x49 | (1 << 8)): + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 4 * MiB, + .ways = 16, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x4A: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 6 * MiB, + .ways = 12, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x4B: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 8 * MiB, + .ways = 16, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x4C: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 12 * MiB, + .ways = 12, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x4D: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 16 * MiB, + .ways = 16, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x4E: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 6 * MiB, + .ways = 24, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x4F: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = UNDEF, + .line_size = UNDEF, + .tlb_entries = 32, + .partitioning = 0}; + case 0x50: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = UNDEF, + .line_size = UNDEF, + .tlb_entries = 64, + .partitioning = 0}; + case 0x51: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = UNDEF, + .line_size = UNDEF, + .tlb_entries = 128, + .partitioning = 0}; + case 0x52: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = UNDEF, + .line_size = UNDEF, + .tlb_entries = 256, + .partitioning = 0}; + case 0x55: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 2 * MiB, + .ways = 0xFF, + .line_size = UNDEF, + .tlb_entries = 7, + .partitioning = 0}; + case 0x56: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * MiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 16, + .partitioning = 0}; + case 0x57: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 16, + .partitioning = 0}; + case 0x59: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 0xFF, + .line_size = UNDEF, + .tlb_entries = 16, + .partitioning = 0}; + case 0x5A: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 2 * MiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 32, + .partitioning = 0}; + case 0x5B: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = UNDEF, + .line_size = UNDEF, + .tlb_entries = 64, + .partitioning = 0}; + case 0x5C: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = UNDEF, + .line_size = UNDEF, + .tlb_entries = 128, + .partitioning = 0}; + case 0x5D: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4, + .ways = UNDEF, + .line_size = UNDEF, + .tlb_entries = 256, + .partitioning = 0}; + case 0x60: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 16 * KiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x61: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 0xFF, + .line_size = UNDEF, + .tlb_entries = 48, + .partitioning = 0}; + case 0x63: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 2 * MiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 4, + .partitioning = 0}; + case 0x66: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 8 * KiB, + .ways = 4, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x67: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 16 * KiB, + .ways = 4, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x68: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 32 * KiB, + .ways = 4, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x70: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_INSTRUCTION, + .cache_size = 12 * KiB, + .ways = 8, + .line_size = UNDEF, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x71: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_INSTRUCTION, + .cache_size = 16 * KiB, + .ways = 8, + .line_size = UNDEF, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x72: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_INSTRUCTION, + .cache_size = 32 * KiB, + .ways = 8, + .line_size = UNDEF, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x76: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 2 * MiB, + .ways = 0xFF, + .line_size = UNDEF, + .tlb_entries = 8, + .partitioning = 0}; + case 0x78: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 1 * MiB, + .ways = 4, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x79: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 128 * KiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 2}; + case 0x7A: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 256 * KiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 2}; + case 0x7B: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 512 * KiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 2}; + case 0x7C: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 1 * MiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 2}; + case 0x7D: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 2 * MiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x7F: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 512 * KiB, + .ways = 2, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x80: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 512 * KiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x82: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 256 * KiB, + .ways = 8, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x83: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 512 * KiB, + .ways = 8, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x84: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 1 * MiB, + .ways = 8, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x85: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 2 * MiB, + .ways = 8, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x86: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 512 * KiB, + .ways = 4, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x87: + return (CacheLevelInfo){.level = 2, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 1 * MiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xA0: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_DTLB, + .cache_size = 4 * KiB, + .ways = 0xFF, + .line_size = UNDEF, + .tlb_entries = 32, + .partitioning = 0}; + case 0xB0: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 128, + .partitioning = 0}; + case 0xB1: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 2 * MiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 8, + .partitioning = 0}; + case 0xB2: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 64, + .partitioning = 0}; + case 0xB3: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 128, + .partitioning = 0}; + case 0xB4: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 256, + .partitioning = 0}; + case 0xB5: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 8, + .line_size = UNDEF, + .tlb_entries = 64, + .partitioning = 0}; + case 0xB6: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 8, + .line_size = UNDEF, + .tlb_entries = 128, + .partitioning = 0}; + case 0xBA: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 64, + .partitioning = 0}; + case 0xC0: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 8, + .partitioning = 0}; + case 0xC1: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_STLB, + .cache_size = 4 * KiB, + .ways = 8, + .line_size = UNDEF, + .tlb_entries = 1024, + .partitioning = 0}; + case 0xC2: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_DTLB, + .cache_size = 4 * KiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 16, + .partitioning = 0}; + case 0xC3: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_STLB, + .cache_size = 4 * KiB, + .ways = 6, + .line_size = UNDEF, + .tlb_entries = 1536, + .partitioning = 0}; + case 0xCA: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_STLB, + .cache_size = 4 * KiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 512, + .partitioning = 0}; + case 0xD0: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 512 * KiB, + .ways = 4, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xD1: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 1 * MiB, + .ways = 4, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xD2: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 2 * MiB, + .ways = 4, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xD6: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 1 * MiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xD7: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 2 * MiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xD8: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 4 * MiB, + .ways = 8, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xDC: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 1 * 1536 * KiB, + .ways = 12, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xDD: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 3 * MiB, + .ways = 12, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xDE: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 6 * MiB, + .ways = 12, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xE2: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 2 * MiB, + .ways = 16, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xE3: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 4 * MiB, + .ways = 16, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xE4: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 8 * MiB, + .ways = 16, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xEA: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 12 * MiB, + .ways = 24, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xEB: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 18 * MiB, + .ways = 24, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xEC: + return (CacheLevelInfo){.level = 3, + .cache_type = CPU_FEATURE_CACHE_DATA, + .cache_size = 24 * MiB, + .ways = 24, + .line_size = 64, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xF0: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_PREFETCH, + .cache_size = 64 * KiB, + .ways = UNDEF, + .line_size = UNDEF, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xF1: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_PREFETCH, + .cache_size = 128 * KiB, + .ways = UNDEF, + .line_size = UNDEF, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0xFF: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_NULL, + .cache_size = UNDEF, + .ways = UNDEF, + .line_size = UNDEF, + .tlb_entries = UNDEF, + .partitioning = 0}; + default: + return kEmptyCacheLevelInfo; + } +} + +// From https://www.felixcloutier.com/x86/cpuid#tbl-3-12 +static void ParseLeaf2(const int max_cpuid_leaf, CacheInfo* info) { + Leaf leaf = SafeCpuId(max_cpuid_leaf, 2); + // The least-significant byte in register EAX (register AL) will always return + // 01H. Software should ignore this value and not interpret it as an + // informational descriptor. + leaf.eax &= 0xFFFFFF00; // Zeroing out AL. 0 is the empty descriptor. + // The most significant bit (bit 31) of each register indicates whether the + // register contains valid information (set to 0) or is reserved (set to 1). + if (IsBitSet(leaf.eax, 31)) leaf.eax = 0; + if (IsBitSet(leaf.ebx, 31)) leaf.ebx = 0; + if (IsBitSet(leaf.ecx, 31)) leaf.ecx = 0; + if (IsBitSet(leaf.edx, 31)) leaf.edx = 0; + + uint8_t data[16]; +#if __STDC_VERSION__ >= 201112L + _Static_assert(sizeof(Leaf) == sizeof(data), "Leaf must be 16 bytes"); +#endif + copy((char*)(data), (const char*)(&leaf), sizeof(data)); + for (size_t i = 0; i < sizeof(data); ++i) { + const uint8_t descriptor = data[i]; + if (descriptor == 0) continue; + info->levels[info->size] = GetCacheLevelInfo(descriptor); + info->size++; + } +} + +static const CacheInfo kEmptyCacheInfo; + +// For newer Intel CPUs uses "CPUID, eax=0x00000004". +// https://www.felixcloutier.com/x86/cpuid#input-eax-=-04h--returns-deterministic-cache-parameters-for-each-level +// For newer AMD CPUs uses "CPUID, eax=0x8000001D" +static void ParseCacheInfo(const int max_cpuid_leaf, uint32_t leaf_id, + CacheInfo* old_info) { + CacheInfo info = kEmptyCacheInfo; + for (int index = 0; info.size < CPU_FEATURES_MAX_CACHE_LEVEL; ++index) { + const Leaf leaf = SafeCpuIdEx(max_cpuid_leaf, leaf_id, index); + int cache_type_field = ExtractBitRange(leaf.eax, 4, 0); + CacheType cache_type; + if (cache_type_field == 0) + break; + else if (cache_type_field == 1) + cache_type = CPU_FEATURE_CACHE_DATA; + else if (cache_type_field == 2) + cache_type = CPU_FEATURE_CACHE_INSTRUCTION; + else if (cache_type_field == 3) + cache_type = CPU_FEATURE_CACHE_UNIFIED; + else + break; // Should not occur as per documentation. + int level = ExtractBitRange(leaf.eax, 7, 5); + int line_size = ExtractBitRange(leaf.ebx, 11, 0) + 1; + int partitioning = ExtractBitRange(leaf.ebx, 21, 12) + 1; + int ways = ExtractBitRange(leaf.ebx, 31, 22) + 1; + int tlb_entries = leaf.ecx + 1; + int cache_size = ways * partitioning * line_size * tlb_entries; + info.levels[info.size] = (CacheLevelInfo){.level = level, + .cache_type = cache_type, + .cache_size = cache_size, + .ways = ways, + .line_size = line_size, + .tlb_entries = tlb_entries, + .partitioning = partitioning}; + ++info.size; + } + // Override CacheInfo if we successfully extracted Deterministic Cache + // Parameters. + if (info.size > 0) *old_info = info; +} + +CacheInfo GetX86CacheInfo(void) { + CacheInfo info = kEmptyCacheInfo; + const Leaf leaf_0 = CpuId(0); + if (IsVendor(leaf_0, CPU_FEATURES_VENDOR_GENUINE_INTEL)) { + ParseLeaf2(leaf_0.eax, &info); + ParseCacheInfo(leaf_0.eax, 4, &info); + } else if (IsVendor(leaf_0, CPU_FEATURES_VENDOR_AUTHENTIC_AMD) || + IsVendor(leaf_0, CPU_FEATURES_VENDOR_HYGON_GENUINE)) { + const uint32_t max_ext = CpuId(0x80000000).eax; + const uint32_t cpuid_ext = SafeCpuId(max_ext, 0x80000001).ecx; + + // If CPUID Fn8000_0001_ECX[TopologyExtensions]==0 + // then CPUID Fn8000_0001_E[D,C,B,A]X is reserved. + // https://www.amd.com/system/files/TechDocs/25481.pdf + if (IsBitSet(cpuid_ext, 22)) { + ParseCacheInfo(max_ext, 0x8000001D, &info); + } + } + return info; +} + +//////////////////////////////////////////////////////////////////////////////// +// Definitions for introspection. +//////////////////////////////////////////////////////////////////////////////// +#define INTROSPECTION_TABLE \ + LINE(X86_FPU, fpu, , , ) \ + LINE(X86_TSC, tsc, , , ) \ + LINE(X86_CX8, cx8, , , ) \ + LINE(X86_CLFSH, clfsh, , , ) \ + LINE(X86_MMX, mmx, , , ) \ + LINE(X86_AES, aes, , , ) \ + LINE(X86_ERMS, erms, , , ) \ + LINE(X86_F16C, f16c, , , ) \ + LINE(X86_FMA4, fma4, , , ) \ + LINE(X86_FMA3, fma3, , , ) \ + LINE(X86_VAES, vaes, , , ) \ + LINE(X86_VPCLMULQDQ, vpclmulqdq, , , ) \ + LINE(X86_BMI1, bmi1, , , ) \ + LINE(X86_HLE, hle, , , ) \ + LINE(X86_BMI2, bmi2, , , ) \ + LINE(X86_RTM, rtm, , , ) \ + LINE(X86_RDSEED, rdseed, , , ) \ + LINE(X86_CLFLUSHOPT, clflushopt, , , ) \ + LINE(X86_CLWB, clwb, , , ) \ + LINE(X86_SSE, sse, , , ) \ + LINE(X86_SSE2, sse2, , , ) \ + LINE(X86_SSE3, sse3, , , ) \ + LINE(X86_SSSE3, ssse3, , , ) \ + LINE(X86_SSE4_1, sse4_1, , , ) \ + LINE(X86_SSE4_2, sse4_2, , , ) \ + LINE(X86_SSE4A, sse4a, , , ) \ + LINE(X86_AVX, avx, , , ) \ + LINE(X86_AVX2, avx2, , , ) \ + LINE(X86_AVX512F, avx512f, , , ) \ + LINE(X86_AVX512CD, avx512cd, , , ) \ + LINE(X86_AVX512ER, avx512er, , , ) \ + LINE(X86_AVX512PF, avx512pf, , , ) \ + LINE(X86_AVX512BW, avx512bw, , , ) \ + LINE(X86_AVX512DQ, avx512dq, , , ) \ + LINE(X86_AVX512VL, avx512vl, , , ) \ + LINE(X86_AVX512IFMA, avx512ifma, , , ) \ + LINE(X86_AVX512VBMI, avx512vbmi, , , ) \ + LINE(X86_AVX512VBMI2, avx512vbmi2, , , ) \ + LINE(X86_AVX512VNNI, avx512vnni, , , ) \ + LINE(X86_AVX512BITALG, avx512bitalg, , , ) \ + LINE(X86_AVX512VPOPCNTDQ, avx512vpopcntdq, , , ) \ + LINE(X86_AVX512_4VNNIW, avx512_4vnniw, , , ) \ + LINE(X86_AVX512_4VBMI2, avx512_4vbmi2, , , ) \ + LINE(X86_AVX512_SECOND_FMA, avx512_second_fma, , , ) \ + LINE(X86_AVX512_4FMAPS, avx512_4fmaps, , , ) \ + LINE(X86_AVX512_BF16, avx512_bf16, , , ) \ + LINE(X86_AVX512_VP2INTERSECT, avx512_vp2intersect, , , ) \ + LINE(X86_AMX_BF16, amx_bf16, , , ) \ + LINE(X86_AMX_TILE, amx_tile, , , ) \ + LINE(X86_AMX_INT8, amx_int8, , , ) \ + LINE(X86_PCLMULQDQ, pclmulqdq, , , ) \ + LINE(X86_SMX, smx, , , ) \ + LINE(X86_SGX, sgx, , , ) \ + LINE(X86_CX16, cx16, , , ) \ + LINE(X86_SHA, sha, , , ) \ + LINE(X86_POPCNT, popcnt, , , ) \ + LINE(X86_MOVBE, movbe, , , ) \ + LINE(X86_RDRND, rdrnd, , , ) \ + LINE(X86_DCA, dca, , , ) \ + LINE(X86_SS, ss, , , ) \ + LINE(X86_ADX, adx, , , ) +#define INTROSPECTION_PREFIX X86 +#define INTROSPECTION_ENUM_PREFIX X86 +#include "define_introspection.inl" + +#define X86_MICROARCHITECTURE_NAMES \ + LINE(X86_UNKNOWN) \ + LINE(INTEL_80486) \ + LINE(INTEL_P5) \ + LINE(INTEL_LAKEMONT) \ + LINE(INTEL_CORE) \ + LINE(INTEL_PNR) \ + LINE(INTEL_NHM) \ + LINE(INTEL_ATOM_BNL) \ + LINE(INTEL_WSM) \ + LINE(INTEL_SNB) \ + LINE(INTEL_IVB) \ + LINE(INTEL_ATOM_SMT) \ + LINE(INTEL_HSW) \ + LINE(INTEL_BDW) \ + LINE(INTEL_SKL) \ + LINE(INTEL_ATOM_GMT) \ + LINE(INTEL_KBL) \ + LINE(INTEL_CFL) \ + LINE(INTEL_WHL) \ + LINE(INTEL_CNL) \ + LINE(INTEL_ICL) \ + LINE(INTEL_TGL) \ + LINE(INTEL_SPR) \ + LINE(INTEL_ADL) \ + LINE(INTEL_RCL) \ + LINE(INTEL_KNIGHTS_M) \ + LINE(INTEL_KNIGHTS_L) \ + LINE(INTEL_KNIGHTS_F) \ + LINE(INTEL_KNIGHTS_C) \ + LINE(INTEL_NETBURST) \ + LINE(AMD_HAMMER) \ + LINE(AMD_K10) \ + LINE(AMD_K11) \ + LINE(AMD_K12) \ + LINE(AMD_BOBCAT) \ + LINE(AMD_PILEDRIVER) \ + LINE(AMD_STREAMROLLER) \ + LINE(AMD_EXCAVATOR) \ + LINE(AMD_BULLDOZER) \ + LINE(AMD_JAGUAR) \ + LINE(AMD_PUMA) \ + LINE(AMD_ZEN) \ + LINE(AMD_ZEN_PLUS) \ + LINE(AMD_ZEN2) \ + LINE(AMD_ZEN3) + +const char* GetX86MicroarchitectureName(X86Microarchitecture value) { +#define LINE(ENUM) [ENUM] = STRINGIZE(ENUM), + static const char* kMicroarchitectureNames[] = {X86_MICROARCHITECTURE_NAMES}; +#undef LINE + if (value >= X86_MICROARCHITECTURE_LAST_) return "unknown microarchitecture"; + return kMicroarchitectureNames[value]; +} diff --git a/cpu_features/src/impl_x86_freebsd.c b/cpu_features/src/impl_x86_freebsd.c new file mode 100644 index 0000000..0777e29 --- /dev/null +++ b/cpu_features/src/impl_x86_freebsd.c @@ -0,0 +1,67 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_X86 +#ifdef CPU_FEATURES_OS_FREEBSD + +#include "impl_x86__base_implementation.inl" + +static void OverrideOsPreserves(OsPreserves* os_preserves) { + (void)os_preserves; + // No override +} + +#include "internal/filesystem.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +static void DetectFeaturesFromOs(X86Features* features) { + // Handling FreeBSD platform through parsing /var/run/dmesg.boot. + const int fd = CpuFeatures_OpenFile("/var/run/dmesg.boot"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (bool stop = false; !stop;) { + const LineResult result = StackLineReader_NextLine(&reader); + if (result.eof) stop = true; + const StringView line = result.line; + if (!CpuFeatures_StringView_StartsWith(line, str(" Features"))) continue; + // Lines of interests are of the following form: + // " Features=0x1783fbff" + // We first extract the comma separated values between angle brackets. + StringView csv = result.line; + int index = CpuFeatures_StringView_IndexOfChar(csv, '<'); + if (index >= 0) csv = CpuFeatures_StringView_PopFront(csv, index + 1); + if (csv.size > 0 && CpuFeatures_StringView_Back(csv) == '>') + csv = CpuFeatures_StringView_PopBack(csv, 1); + if (CpuFeatures_StringView_HasWord(csv, "SSE", ',')) features->sse = true; + if (CpuFeatures_StringView_HasWord(csv, "SSE2", ',')) + features->sse2 = true; + if (CpuFeatures_StringView_HasWord(csv, "SSE3", ',')) + features->sse3 = true; + if (CpuFeatures_StringView_HasWord(csv, "SSSE3", ',')) + features->ssse3 = true; + if (CpuFeatures_StringView_HasWord(csv, "SSE4.1", ',')) + features->sse4_1 = true; + if (CpuFeatures_StringView_HasWord(csv, "SSE4.2", ',')) + features->sse4_2 = true; + } + CpuFeatures_CloseFile(fd); + } +} + +#endif // CPU_FEATURES_OS_FREEBSD +#endif // CPU_FEATURES_ARCH_X86 diff --git a/cpu_features/src/impl_x86_linux_or_android.c b/cpu_features/src/impl_x86_linux_or_android.c new file mode 100644 index 0000000..9c9f49a --- /dev/null +++ b/cpu_features/src/impl_x86_linux_or_android.c @@ -0,0 +1,57 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_X86 +#if defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) + +#include "impl_x86__base_implementation.inl" + +static void OverrideOsPreserves(OsPreserves* os_preserves) { + (void)os_preserves; + // No override +} + +#include "internal/filesystem.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" +static void DetectFeaturesFromOs(X86Features* features) { + // Handling Linux platform through /proc/cpuinfo. + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (bool stop = false; !stop;) { + const LineResult result = StackLineReader_NextLine(&reader); + if (result.eof) stop = true; + const StringView line = result.line; + StringView key, value; + if (!CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) + continue; + if (!CpuFeatures_StringView_IsEquals(key, str("flags"))) continue; + features->sse = CpuFeatures_StringView_HasWord(value, "sse", ' '); + features->sse2 = CpuFeatures_StringView_HasWord(value, "sse2", ' '); + features->sse3 = CpuFeatures_StringView_HasWord(value, "sse3", ' '); + features->ssse3 = CpuFeatures_StringView_HasWord(value, "ssse3", ' '); + features->sse4_1 = CpuFeatures_StringView_HasWord(value, "sse4_1", ' '); + features->sse4_2 = CpuFeatures_StringView_HasWord(value, "sse4_2", ' '); + break; + } + CpuFeatures_CloseFile(fd); + } +} + +#endif // defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) +#endif // CPU_FEATURES_ARCH_X86 diff --git a/cpu_features/src/impl_x86_macos.c b/cpu_features/src/impl_x86_macos.c new file mode 100644 index 0000000..6133686 --- /dev/null +++ b/cpu_features/src/impl_x86_macos.c @@ -0,0 +1,56 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_X86 +#ifdef CPU_FEATURES_OS_MACOS + +#include "impl_x86__base_implementation.inl" + +#if !defined(HAVE_SYSCTLBYNAME) +#error "Darwin needs support for sysctlbyname" +#endif +#include + +#if defined(CPU_FEATURES_MOCK_CPUID_X86) +extern bool GetDarwinSysCtlByName(const char*); +#else // CPU_FEATURES_MOCK_CPUID_X86 +static bool GetDarwinSysCtlByName(const char* name) { + int enabled; + size_t enabled_len = sizeof(enabled); + const int failure = sysctlbyname(name, &enabled, &enabled_len, NULL, 0); + return failure ? false : enabled; +} +#endif + +static void OverrideOsPreserves(OsPreserves* os_preserves) { + // On Darwin AVX512 support is On-demand. + // We have to query the OS instead of querying the Zmm save/restore state. + // https://github.com/apple/darwin-xnu/blob/8f02f2a044b9bb1ad951987ef5bab20ec9486310/osfmk/i386/fpu.c#L173-L199 + os_preserves->avx512_registers = GetDarwinSysCtlByName("hw.optional.avx512f"); +} + +static void DetectFeaturesFromOs(X86Features* features) { + // Handling Darwin platform through sysctlbyname. + features->sse = GetDarwinSysCtlByName("hw.optional.sse"); + features->sse2 = GetDarwinSysCtlByName("hw.optional.sse2"); + features->sse3 = GetDarwinSysCtlByName("hw.optional.sse3"); + features->ssse3 = GetDarwinSysCtlByName("hw.optional.supplementalsse3"); + features->sse4_1 = GetDarwinSysCtlByName("hw.optional.sse4_1"); + features->sse4_2 = GetDarwinSysCtlByName("hw.optional.sse4_2"); +} + +#endif // CPU_FEATURES_OS_MACOS +#endif // CPU_FEATURES_ARCH_X86 diff --git a/cpu_features/src/impl_x86_windows.c b/cpu_features/src/impl_x86_windows.c new file mode 100644 index 0000000..e970539 --- /dev/null +++ b/cpu_features/src/impl_x86_windows.c @@ -0,0 +1,49 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_X86 +#ifdef CPU_FEATURES_OS_WINDOWS + +#include "impl_x86__base_implementation.inl" + +static void OverrideOsPreserves(OsPreserves* os_preserves) { + (void)os_preserves; + // No override +} + +#include // IsProcessorFeaturePresent + +#if defined(CPU_FEATURES_MOCK_CPUID_X86) +extern bool GetWindowsIsProcessorFeaturePresent(DWORD); +#else // CPU_FEATURES_MOCK_CPUID_X86 +static bool GetWindowsIsProcessorFeaturePresent(DWORD ProcessorFeature) { + return IsProcessorFeaturePresent(ProcessorFeature); +} +#endif + +static void DetectFeaturesFromOs(X86Features* features) { + // Handling Windows platform through IsProcessorFeaturePresent. + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + features->sse = + GetWindowsIsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE); + features->sse2 = + GetWindowsIsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE); + features->sse3 = + GetWindowsIsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE); +} + +#endif // CPU_FEATURES_OS_WINDOWS +#endif // CPU_FEATURES_ARCH_X86 diff --git a/cpu_features/src/stack_line_reader.c b/cpu_features/src/stack_line_reader.c new file mode 100644 index 0000000..ffc778d --- /dev/null +++ b/cpu_features/src/stack_line_reader.c @@ -0,0 +1,132 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/stack_line_reader.h" + +#include +#include +#include + +#include "internal/filesystem.h" + +void StackLineReader_Initialize(StackLineReader* reader, int fd) { + reader->view.ptr = reader->buffer; + reader->view.size = 0; + reader->skip_mode = false; + reader->fd = fd; +} + +// Replaces the content of buffer with bytes from the file. +static int LoadFullBuffer(StackLineReader* reader) { + const int read = CpuFeatures_ReadFile(reader->fd, reader->buffer, + STACK_LINE_READER_BUFFER_SIZE); + assert(read >= 0); + reader->view.ptr = reader->buffer; + reader->view.size = read; + return read; +} + +// Appends with bytes from the file to buffer, filling the remaining space. +static int LoadMore(StackLineReader* reader) { + char* const ptr = reader->buffer + reader->view.size; + const size_t size_to_read = STACK_LINE_READER_BUFFER_SIZE - reader->view.size; + const int read = CpuFeatures_ReadFile(reader->fd, ptr, size_to_read); + assert(read >= 0); + assert(read <= (int)size_to_read); + reader->view.size += read; + return read; +} + +static int IndexOfEol(StackLineReader* reader) { + return CpuFeatures_StringView_IndexOfChar(reader->view, '\n'); +} + +// Relocate buffer's pending bytes at the beginning of the array and fills the +// remaining space with bytes from the file. +static int BringToFrontAndLoadMore(StackLineReader* reader) { + if (reader->view.size && reader->view.ptr != reader->buffer) { + memmove(reader->buffer, reader->view.ptr, reader->view.size); + } + reader->view.ptr = reader->buffer; + return LoadMore(reader); +} + +// Loads chunks of buffer size from disks until it contains a newline character +// or end of file. +static void SkipToNextLine(StackLineReader* reader) { + for (;;) { + const int read = LoadFullBuffer(reader); + if (read == 0) { + break; + } else { + const int eol_index = IndexOfEol(reader); + if (eol_index >= 0) { + reader->view = + CpuFeatures_StringView_PopFront(reader->view, eol_index + 1); + break; + } + } + } +} + +static LineResult CreateLineResult(bool eof, bool full_line, StringView view) { + LineResult result; + result.eof = eof; + result.full_line = full_line; + result.line = view; + return result; +} + +// Helper methods to provide clearer semantic in StackLineReader_NextLine. +static LineResult CreateEOFLineResult(StringView view) { + return CreateLineResult(true, true, view); +} + +static LineResult CreateTruncatedLineResult(StringView view) { + return CreateLineResult(false, false, view); +} + +static LineResult CreateValidLineResult(StringView view) { + return CreateLineResult(false, true, view); +} + +LineResult StackLineReader_NextLine(StackLineReader* reader) { + if (reader->skip_mode) { + SkipToNextLine(reader); + reader->skip_mode = false; + } + { + const bool can_load_more = + reader->view.size < STACK_LINE_READER_BUFFER_SIZE; + int eol_index = IndexOfEol(reader); + if (eol_index < 0 && can_load_more) { + const int read = BringToFrontAndLoadMore(reader); + if (read == 0) { + return CreateEOFLineResult(reader->view); + } + eol_index = IndexOfEol(reader); + } + if (eol_index < 0) { + reader->skip_mode = true; + return CreateTruncatedLineResult(reader->view); + } + { + StringView line = + CpuFeatures_StringView_KeepFront(reader->view, eol_index); + reader->view = + CpuFeatures_StringView_PopFront(reader->view, eol_index + 1); + return CreateValidLineResult(line); + } + } +} diff --git a/cpu_features/src/string_view.c b/cpu_features/src/string_view.c new file mode 100644 index 0000000..2cac9da --- /dev/null +++ b/cpu_features/src/string_view.c @@ -0,0 +1,192 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/string_view.h" + +#include +#include + +#include "copy.inl" +#include "equals.inl" + +static const char* CpuFeatures_memchr(const char* const ptr, const size_t size, + const char c) { + for (size_t i = 0; ptr && ptr[i] != '\0' && i < size; ++i) + if (ptr[i] == c) return ptr + i; + return NULL; +} + +int CpuFeatures_StringView_IndexOfChar(const StringView view, char c) { + if (view.ptr && view.size) { + const char* const found = CpuFeatures_memchr(view.ptr, view.size, c); + if (found) { + return (int)(found - view.ptr); + } + } + return -1; +} + +int CpuFeatures_StringView_IndexOf(const StringView view, + const StringView sub_view) { + if (sub_view.size) { + StringView remainder = view; + while (remainder.size >= sub_view.size) { + const int found_index = + CpuFeatures_StringView_IndexOfChar(remainder, sub_view.ptr[0]); + if (found_index < 0) break; + remainder = CpuFeatures_StringView_PopFront(remainder, found_index); + if (CpuFeatures_StringView_StartsWith(remainder, sub_view)) { + return (int)(remainder.ptr - view.ptr); + } + remainder = CpuFeatures_StringView_PopFront(remainder, 1); + } + } + return -1; +} + +bool CpuFeatures_StringView_IsEquals(const StringView a, const StringView b) { + if (a.size == b.size) { + return a.ptr == b.ptr || equals(a.ptr, b.ptr, b.size); + } + return false; +} + +bool CpuFeatures_StringView_StartsWith(const StringView a, const StringView b) { + return a.ptr && b.ptr && b.size && a.size >= b.size + ? equals(a.ptr, b.ptr, b.size) + : false; +} + +StringView CpuFeatures_StringView_PopFront(const StringView str_view, + size_t count) { + if (count > str_view.size) { + return kEmptyStringView; + } + return view(str_view.ptr + count, str_view.size - count); +} + +StringView CpuFeatures_StringView_PopBack(const StringView str_view, + size_t count) { + if (count > str_view.size) { + return kEmptyStringView; + } + return view(str_view.ptr, str_view.size - count); +} + +StringView CpuFeatures_StringView_KeepFront(const StringView str_view, + size_t count) { + return count <= str_view.size ? view(str_view.ptr, count) : str_view; +} + +char CpuFeatures_StringView_Front(const StringView view) { + assert(view.size); + assert(view.ptr); + return view.ptr[0]; +} + +char CpuFeatures_StringView_Back(const StringView view) { + assert(view.size); + return view.ptr[view.size - 1]; +} + +StringView CpuFeatures_StringView_TrimWhitespace(StringView view) { + while (view.size && isspace(CpuFeatures_StringView_Front(view))) + view = CpuFeatures_StringView_PopFront(view, 1); + while (view.size && isspace(CpuFeatures_StringView_Back(view))) + view = CpuFeatures_StringView_PopBack(view, 1); + return view; +} + +static int HexValue(const char c) { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + return -1; +} + +// Returns -1 if view contains non digits. +static int ParsePositiveNumberWithBase(const StringView view, int base) { + int result = 0; + StringView remainder = view; + for (; remainder.size; + remainder = CpuFeatures_StringView_PopFront(remainder, 1)) { + const int value = HexValue(CpuFeatures_StringView_Front(remainder)); + if (value < 0 || value >= base) return -1; + result = (result * base) + value; + } + return result; +} + +int CpuFeatures_StringView_ParsePositiveNumber(const StringView view) { + if (view.size) { + const StringView hex_prefix = str("0x"); + if (CpuFeatures_StringView_StartsWith(view, hex_prefix)) { + const StringView span_no_prefix = + CpuFeatures_StringView_PopFront(view, hex_prefix.size); + return ParsePositiveNumberWithBase(span_no_prefix, 16); + } + return ParsePositiveNumberWithBase(view, 10); + } + return -1; +} + +void CpuFeatures_StringView_CopyString(const StringView src, char* dst, + size_t dst_size) { + if (dst_size > 0) { + const size_t max_copy_size = dst_size - 1; + const size_t copy_size = + src.size > max_copy_size ? max_copy_size : src.size; + copy(dst, src.ptr, copy_size); + dst[copy_size] = '\0'; + } +} + +bool CpuFeatures_StringView_HasWord(const StringView line, + const char* const word_str, + const char separator) { + const StringView word = str(word_str); + StringView remainder = line; + for (;;) { + const int index_of_word = CpuFeatures_StringView_IndexOf(remainder, word); + if (index_of_word < 0) { + return false; + } else { + const StringView before = + CpuFeatures_StringView_KeepFront(line, index_of_word); + const StringView after = + CpuFeatures_StringView_PopFront(line, index_of_word + word.size); + const bool valid_before = + before.size == 0 || CpuFeatures_StringView_Back(before) == separator; + const bool valid_after = + after.size == 0 || CpuFeatures_StringView_Front(after) == separator; + if (valid_before && valid_after) return true; + remainder = + CpuFeatures_StringView_PopFront(remainder, index_of_word + word.size); + } + } + return false; +} + +bool CpuFeatures_StringView_GetAttributeKeyValue(const StringView line, + StringView* key, + StringView* value) { + const StringView sep = str(": "); + const int index_of_separator = CpuFeatures_StringView_IndexOf(line, sep); + if (index_of_separator < 0) return false; + *value = CpuFeatures_StringView_TrimWhitespace( + CpuFeatures_StringView_PopFront(line, index_of_separator + sep.size)); + *key = CpuFeatures_StringView_TrimWhitespace( + CpuFeatures_StringView_KeepFront(line, index_of_separator)); + return true; +} diff --git a/cpu_features/src/utils/list_cpu_features.c b/cpu_features/src/utils/list_cpu_features.c new file mode 100644 index 0000000..4389f20 --- /dev/null +++ b/cpu_features/src/utils/list_cpu_features.c @@ -0,0 +1,439 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This program dumps current host data to the standard output. +// Output can be text or json if the `--json` flag is passed. + +#include +#include +#include +#include +#include +#include +#include + +#include "cpu_features_macros.h" + +#if defined(CPU_FEATURES_ARCH_X86) +#include "cpuinfo_x86.h" +#elif defined(CPU_FEATURES_ARCH_ARM) +#include "cpuinfo_arm.h" +#elif defined(CPU_FEATURES_ARCH_AARCH64) +#include "cpuinfo_aarch64.h" +#elif defined(CPU_FEATURES_ARCH_MIPS) +#include "cpuinfo_mips.h" +#elif defined(CPU_FEATURES_ARCH_PPC) +#include "cpuinfo_ppc.h" +#endif + +// Design principles +// ----------------- +// We build a tree structure containing all the data to be displayed. +// Then depending on the output type (text or json) we walk the tree and display +// the data accordingly. + +// We use a bump allocator to allocate strings and nodes of the tree, +// Memory is not intended to be reclaimed. +typedef struct { + char* ptr; + size_t size; +} BumpAllocator; + +char gGlobalBuffer[64 * 1024]; +BumpAllocator gBumpAllocator = {.ptr = gGlobalBuffer, + .size = sizeof(gGlobalBuffer)}; + +static void internal_error() { + fputs("internal error\n", stderr); + exit(EXIT_FAILURE); +} + +#define ALIGN 8 + +static void assertAligned() { + if ((uintptr_t)(gBumpAllocator.ptr) % ALIGN) internal_error(); +} + +static void BA_Align() { + while (gBumpAllocator.size && (uintptr_t)(gBumpAllocator.ptr) % ALIGN) { + --gBumpAllocator.size; + ++gBumpAllocator.ptr; + } + assertAligned(); +} + +// Update the available memory left in the BumpAllocator. +static void* BA_Bump(size_t size) { + assertAligned(); + // Align size to next 8B boundary. + size = (size + ALIGN - 1) / ALIGN * ALIGN; + if (gBumpAllocator.size < size) internal_error(); + void* ptr = gBumpAllocator.ptr; + gBumpAllocator.size -= size; + gBumpAllocator.ptr += size; + return ptr; +} + +// The type of the nodes in the tree. +typedef enum { + NT_INVALID, + NT_INT, + NT_MAP, + NT_MAP_ENTRY, + NT_ARRAY, + NT_ARRAY_ELEMENT, + NT_STRING, +} NodeType; + +// The node in the tree. +typedef struct Node { + NodeType type; + unsigned integer; + const char* string; + struct Node* value; + struct Node* next; +} Node; + +// Creates an initialized Node. +static Node* BA_CreateNode(NodeType type) { + Node* tv = (Node*)BA_Bump(sizeof(Node)); + assert(tv); + *tv = (Node){.type = type}; + return tv; +} + +// Adds an integer node. +static Node* CreateInt(int value) { + Node* tv = BA_CreateNode(NT_INT); + tv->integer = value; + return tv; +} + +// Adds a string node. +// `value` must outlive the tree. +static Node* CreateConstantString(const char* value) { + Node* tv = BA_CreateNode(NT_STRING); + tv->string = value; + return tv; +} + +// Adds a map node. +static Node* CreateMap() { return BA_CreateNode(NT_MAP); } + +// Adds an array node. +static Node* CreateArray() { return BA_CreateNode(NT_ARRAY); } + +// Adds a formatted string node. +static Node* CreatePrintfString(const char* format, ...) { + va_list arglist; + va_start(arglist, format); + char* const ptr = gBumpAllocator.ptr; + const int written = vsnprintf(ptr, gBumpAllocator.size, format, arglist); + va_end(arglist); + if (written < 0 || written >= (int)gBumpAllocator.size) internal_error(); + return CreateConstantString((char*)BA_Bump(written)); +} + +// Adds a string node. +static Node* CreateString(const char* value) { + return CreatePrintfString("%s", value); +} + +// Adds a map entry node. +static void AddMapEntry(Node* map, const char* key, Node* value) { + assert(map && map->type == NT_MAP); + Node* current = map; + while (current->next) current = current->next; + current->next = (Node*)BA_Bump(sizeof(Node)); + *current->next = (Node){.type = NT_MAP_ENTRY, .string = key, .value = value}; +} + +// Adds an array element node. +static void AddArrayElement(Node* array, Node* value) { + assert(array && array->type == NT_ARRAY); + Node* current = array; + while (current->next) current = current->next; + current->next = (Node*)BA_Bump(sizeof(Node)); + *current->next = (Node){.type = NT_ARRAY_ELEMENT, .value = value}; +} + +static int cmp(const void* p1, const void* p2) { + return strcmp(*(const char* const*)p1, *(const char* const*)p2); +} + +#define DEFINE_ADD_FLAGS(HasFeature, FeatureName, FeatureType, LastEnum) \ + static void AddFlags(Node* map, const FeatureType* features) { \ + size_t i; \ + const char* ptrs[LastEnum] = {0}; \ + size_t count = 0; \ + for (i = 0; i < LastEnum; ++i) { \ + if (HasFeature(features, i)) { \ + ptrs[count] = FeatureName(i); \ + ++count; \ + } \ + } \ + qsort((void*)ptrs, count, sizeof(char*), cmp); \ + Node* const array = CreateArray(); \ + for (i = 0; i < count; ++i) \ + AddArrayElement(array, CreateConstantString(ptrs[i])); \ + AddMapEntry(map, "flags", array); \ + } + +#if defined(CPU_FEATURES_ARCH_X86) +DEFINE_ADD_FLAGS(GetX86FeaturesEnumValue, GetX86FeaturesEnumName, X86Features, + X86_LAST_) +#elif defined(CPU_FEATURES_ARCH_ARM) +DEFINE_ADD_FLAGS(GetArmFeaturesEnumValue, GetArmFeaturesEnumName, ArmFeatures, + ARM_LAST_) +#elif defined(CPU_FEATURES_ARCH_AARCH64) +DEFINE_ADD_FLAGS(GetAarch64FeaturesEnumValue, GetAarch64FeaturesEnumName, + Aarch64Features, AARCH64_LAST_) +#elif defined(CPU_FEATURES_ARCH_MIPS) +DEFINE_ADD_FLAGS(GetMipsFeaturesEnumValue, GetMipsFeaturesEnumName, + MipsFeatures, MIPS_LAST_) +#elif defined(CPU_FEATURES_ARCH_PPC) +DEFINE_ADD_FLAGS(GetPPCFeaturesEnumValue, GetPPCFeaturesEnumName, PPCFeatures, + PPC_LAST_) +#endif + +// Prints a json string with characters escaping. +static void printJsonString(const char* str) { + putchar('"'); + for (; str && *str; ++str) { + switch (*str) { + case '\"': + case '\\': + case '/': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + putchar('\\'); + } + putchar(*str); + } + putchar('"'); +} + +// Walks a Node and print it as json. +static void printJson(const Node* current) { + assert(current); + switch (current->type) { + case NT_INVALID: + break; + case NT_INT: + printf("%d", current->integer); + break; + case NT_STRING: + printJsonString(current->string); + break; + case NT_ARRAY: + putchar('['); + if (current->next) printJson(current->next); + putchar(']'); + break; + case NT_MAP: + putchar('{'); + if (current->next) printJson(current->next); + putchar('}'); + break; + case NT_MAP_ENTRY: + printf("\"%s\":", current->string); + printJson(current->value); + if (current->next) { + putchar(','); + printJson(current->next); + } + break; + case NT_ARRAY_ELEMENT: + printJson(current->value); + if (current->next) { + putchar(','); + printJson(current->next); + } + break; + } +} + +// Walks a Node and print it as text. +static void printTextField(const Node* current) { + switch (current->type) { + case NT_INVALID: + break; + case NT_INT: + printf("%3d (0x%02X)", current->integer, current->integer); + break; + case NT_STRING: + fputs(current->string, stdout); + break; + case NT_ARRAY: + if (current->next) printTextField(current->next); + break; + case NT_MAP: + if (current->next) { + printf("{"); + printJson(current->next); + printf("}"); + } + break; + case NT_MAP_ENTRY: + printf("%-15s : ", current->string); + printTextField(current->value); + if (current->next) { + putchar('\n'); + printTextField(current->next); + } + break; + case NT_ARRAY_ELEMENT: + printTextField(current->value); + if (current->next) { + putchar(','); + printTextField(current->next); + } + break; + } +} + +static void printTextRoot(const Node* current) { + if (current->type == NT_MAP && current->next) printTextField(current->next); +} + +static void showUsage(const char* name) { + printf( + "\n" + "Usage: %s [options]\n" + " Options:\n" + " -h | --help Show help message.\n" + " -j | --json Format output as json instead of plain text.\n" + "\n", + name); +} + +static Node* GetCacheTypeString(CacheType cache_type) { + switch (cache_type) { + case CPU_FEATURE_CACHE_NULL: + return CreateConstantString("null"); + case CPU_FEATURE_CACHE_DATA: + return CreateConstantString("data"); + case CPU_FEATURE_CACHE_INSTRUCTION: + return CreateConstantString("instruction"); + case CPU_FEATURE_CACHE_UNIFIED: + return CreateConstantString("unified"); + case CPU_FEATURE_CACHE_TLB: + return CreateConstantString("tlb"); + case CPU_FEATURE_CACHE_DTLB: + return CreateConstantString("dtlb"); + case CPU_FEATURE_CACHE_STLB: + return CreateConstantString("stlb"); + case CPU_FEATURE_CACHE_PREFETCH: + return CreateConstantString("prefetch"); + } + UNREACHABLE(); +} + +static void AddCacheInfo(Node* root, const CacheInfo* cache_info) { + Node* array = CreateArray(); + for (int i = 0; i < cache_info->size; ++i) { + CacheLevelInfo info = cache_info->levels[i]; + Node* map = CreateMap(); + AddMapEntry(map, "level", CreateInt(info.level)); + AddMapEntry(map, "cache_type", GetCacheTypeString(info.cache_type)); + AddMapEntry(map, "cache_size", CreateInt(info.cache_size)); + AddMapEntry(map, "ways", CreateInt(info.ways)); + AddMapEntry(map, "line_size", CreateInt(info.line_size)); + AddMapEntry(map, "tlb_entries", CreateInt(info.tlb_entries)); + AddMapEntry(map, "partitioning", CreateInt(info.partitioning)); + AddArrayElement(array, map); + } + AddMapEntry(root, "cache_info", array); +} + +static Node* CreateTree() { + Node* root = CreateMap(); +#if defined(CPU_FEATURES_ARCH_X86) + char brand_string[49]; + const X86Info info = GetX86Info(); + const CacheInfo cache_info = GetX86CacheInfo(); + FillX86BrandString(brand_string); + AddMapEntry(root, "arch", CreateString("x86")); + AddMapEntry(root, "brand", CreateString(brand_string)); + AddMapEntry(root, "family", CreateInt(info.family)); + AddMapEntry(root, "model", CreateInt(info.model)); + AddMapEntry(root, "stepping", CreateInt(info.stepping)); + AddMapEntry(root, "uarch", + CreateString( + GetX86MicroarchitectureName(GetX86Microarchitecture(&info)))); + AddFlags(root, &info.features); + AddCacheInfo(root, &cache_info); +#elif defined(CPU_FEATURES_ARCH_ARM) + const ArmInfo info = GetArmInfo(); + AddMapEntry(root, "arch", CreateString("ARM")); + AddMapEntry(root, "implementer", CreateInt(info.implementer)); + AddMapEntry(root, "architecture", CreateInt(info.architecture)); + AddMapEntry(root, "variant", CreateInt(info.variant)); + AddMapEntry(root, "part", CreateInt(info.part)); + AddMapEntry(root, "revision", CreateInt(info.revision)); + AddFlags(root, &info.features); +#elif defined(CPU_FEATURES_ARCH_AARCH64) + const Aarch64Info info = GetAarch64Info(); + AddMapEntry(root, "arch", CreateString("aarch64")); + AddMapEntry(root, "implementer", CreateInt(info.implementer)); + AddMapEntry(root, "variant", CreateInt(info.variant)); + AddMapEntry(root, "part", CreateInt(info.part)); + AddMapEntry(root, "revision", CreateInt(info.revision)); + AddFlags(root, &info.features); +#elif defined(CPU_FEATURES_ARCH_MIPS) + const MipsInfo info = GetMipsInfo(); + AddMapEntry(root, "arch", CreateString("mips")); + AddFlags(root, &info.features); +#elif defined(CPU_FEATURES_ARCH_PPC) + const PPCInfo info = GetPPCInfo(); + const PPCPlatformStrings strings = GetPPCPlatformStrings(); + AddMapEntry(root, "arch", CreateString("ppc")); + AddMapEntry(root, "platform", CreateString(strings.platform)); + AddMapEntry(root, "model", CreateString(strings.model)); + AddMapEntry(root, "machine", CreateString(strings.machine)); + AddMapEntry(root, "cpu", CreateString(strings.cpu)); + AddMapEntry(root, "instruction", CreateString(strings.type.platform)); + AddMapEntry(root, "microarchitecture", + CreateString(strings.type.base_platform)); + AddFlags(root, &info.features); +#endif + return root; +} + +int main(int argc, char** argv) { + BA_Align(); + const Node* const root = CreateTree(); + bool outputJson = false; + int i = 1; + for (; i < argc; ++i) { + const char* arg = argv[i]; + if (strcmp(arg, "-j") == 0 || strcmp(arg, "--json") == 0) { + outputJson = true; + } else { + showUsage(argv[0]); + if (strcmp(arg, "-h") == 0 || strcmp(arg, "--help") == 0) + return EXIT_SUCCESS; + return EXIT_FAILURE; + } + } + if (outputJson) + printJson(root); + else + printTextRoot(root); + putchar('\n'); + return EXIT_SUCCESS; +} diff --git a/cpu_features/test/CMakeLists.txt b/cpu_features/test/CMakeLists.txt new file mode 100644 index 0000000..8e8f72a --- /dev/null +++ b/cpu_features/test/CMakeLists.txt @@ -0,0 +1,91 @@ +# +# libraries for tests +# + +include_directories(../include) +add_definitions(-DCPU_FEATURES_TEST) + +##------------------------------------------------------------------------------ +add_library(string_view ../src/string_view.c) +##------------------------------------------------------------------------------ +add_library(filesystem_for_testing filesystem_for_testing.cc) +target_compile_definitions(filesystem_for_testing PUBLIC CPU_FEATURES_MOCK_FILESYSTEM) +##------------------------------------------------------------------------------ +add_library(hwcaps_for_testing hwcaps_for_testing.cc) +target_link_libraries(hwcaps_for_testing filesystem_for_testing) +##------------------------------------------------------------------------------ +add_library(stack_line_reader ../src/stack_line_reader.c) +target_compile_definitions(stack_line_reader PUBLIC STACK_LINE_READER_BUFFER_SIZE=1024) +target_link_libraries(stack_line_reader string_view) +##------------------------------------------------------------------------------ +add_library(stack_line_reader_for_test ../src/stack_line_reader.c) +target_compile_definitions(stack_line_reader_for_test PUBLIC STACK_LINE_READER_BUFFER_SIZE=16) +target_link_libraries(stack_line_reader_for_test string_view filesystem_for_testing) +##------------------------------------------------------------------------------ +add_library(all_libraries ../src/hwcaps.c ../src/stack_line_reader.c) +target_link_libraries(all_libraries hwcaps_for_testing stack_line_reader string_view) + +# +# tests +# +link_libraries(gtest gmock_main) + +## bit_utils_test +add_executable(bit_utils_test bit_utils_test.cc) +target_link_libraries(bit_utils_test) +add_test(NAME bit_utils_test COMMAND bit_utils_test) +##------------------------------------------------------------------------------ +## string_view_test +add_executable(string_view_test string_view_test.cc ../src/string_view.c) +target_link_libraries(string_view_test string_view) +add_test(NAME string_view_test COMMAND string_view_test) +##------------------------------------------------------------------------------ +## stack_line_reader_test +add_executable(stack_line_reader_test stack_line_reader_test.cc) +target_link_libraries(stack_line_reader_test stack_line_reader_for_test) +add_test(NAME stack_line_reader_test COMMAND stack_line_reader_test) +##------------------------------------------------------------------------------ +## cpuinfo_x86_test +if(PROCESSOR_IS_X86) + add_executable(cpuinfo_x86_test + cpuinfo_x86_test.cc + ../src/impl_x86_freebsd.c + ../src/impl_x86_linux_or_android.c + ../src/impl_x86_macos.c + ../src/impl_x86_windows.c + ) + target_compile_definitions(cpuinfo_x86_test PUBLIC CPU_FEATURES_MOCK_CPUID_X86) + if(APPLE) + target_compile_definitions(cpuinfo_x86_test PRIVATE HAVE_SYSCTLBYNAME) + endif() + target_link_libraries(cpuinfo_x86_test all_libraries) + add_test(NAME cpuinfo_x86_test COMMAND cpuinfo_x86_test) +endif() +##------------------------------------------------------------------------------ +## cpuinfo_arm_test +if(PROCESSOR_IS_ARM) + add_executable(cpuinfo_arm_test cpuinfo_arm_test.cc ../src/impl_arm_linux_or_android.c) + target_link_libraries(cpuinfo_arm_test all_libraries) + add_test(NAME cpuinfo_arm_test COMMAND cpuinfo_arm_test) +endif() +##------------------------------------------------------------------------------ +## cpuinfo_aarch64_test +if(PROCESSOR_IS_AARCH64) + add_executable(cpuinfo_aarch64_test cpuinfo_aarch64_test.cc ../src/impl_aarch64_linux_or_android.c) + target_link_libraries(cpuinfo_aarch64_test all_libraries) + add_test(NAME cpuinfo_aarch64_test COMMAND cpuinfo_aarch64_test) +endif() +##------------------------------------------------------------------------------ +## cpuinfo_mips_test +if(PROCESSOR_IS_MIPS) + add_executable(cpuinfo_mips_test cpuinfo_mips_test.cc ../src/impl_mips_linux_or_android.c) + target_link_libraries(cpuinfo_mips_test all_libraries) + add_test(NAME cpuinfo_mips_test COMMAND cpuinfo_mips_test) +endif() +##------------------------------------------------------------------------------ +## cpuinfo_ppc_test +if(PROCESSOR_IS_POWER) + add_executable(cpuinfo_ppc_test cpuinfo_ppc_test.cc ../src/impl_ppc_linux.c) + target_link_libraries(cpuinfo_ppc_test all_libraries) + add_test(NAME cpuinfo_ppc_test COMMAND cpuinfo_ppc_test) +endif() diff --git a/cpu_features/test/bit_utils_test.cc b/cpu_features/test/bit_utils_test.cc new file mode 100644 index 0000000..3874e13 --- /dev/null +++ b/cpu_features/test/bit_utils_test.cc @@ -0,0 +1,53 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/bit_utils.h" + +#include "gtest/gtest.h" + +namespace cpu_features { +namespace { + +TEST(UtilsTest, IsBitSet) { + for (size_t bit_set = 0; bit_set < 32; ++bit_set) { + const uint32_t value = 1UL << bit_set; + for (uint32_t i = 0; i < 32; ++i) { + EXPECT_EQ(IsBitSet(value, i), i == bit_set); + } + } + + // testing 0, all bits should be 0. + for (uint32_t i = 0; i < 32; ++i) { + EXPECT_FALSE(IsBitSet(0, i)); + } + + // testing ~0, all bits should be 1. + for (uint32_t i = 0; i < 32; ++i) { + EXPECT_TRUE(IsBitSet(-1, i)); + } +} + +TEST(UtilsTest, ExtractBitRange) { + // Extracting all bits gives the same number. + EXPECT_EQ(ExtractBitRange(123, 31, 0), 123); + // Extracting 1 bit gives parity. + EXPECT_EQ(ExtractBitRange(123, 0, 0), 1); + EXPECT_EQ(ExtractBitRange(122, 0, 0), 0); + + EXPECT_EQ(ExtractBitRange(0xF0, 7, 4), 0xF); + EXPECT_EQ(ExtractBitRange(0x42 << 2, 10, 2), 0x42); +} + +} // namespace +} // namespace cpu_features diff --git a/cpu_features/test/cpuinfo_aarch64_test.cc b/cpu_features/test/cpuinfo_aarch64_test.cc new file mode 100644 index 0000000..04b6143 --- /dev/null +++ b/cpu_features/test/cpuinfo_aarch64_test.cc @@ -0,0 +1,174 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_aarch64.h" + +#include "filesystem_for_testing.h" +#include "gtest/gtest.h" +#include "hwcaps_for_testing.h" + +namespace cpu_features { +namespace { + +void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); } + +TEST(CpuinfoAarch64Test, FromHardwareCap) { + ResetHwcaps(); + SetHardwareCapabilities(AARCH64_HWCAP_FP | AARCH64_HWCAP_AES, 0); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetAarch64Info(); + EXPECT_TRUE(info.features.fp); + EXPECT_FALSE(info.features.asimd); + EXPECT_FALSE(info.features.evtstrm); + EXPECT_TRUE(info.features.aes); + EXPECT_FALSE(info.features.pmull); + EXPECT_FALSE(info.features.sha1); + EXPECT_FALSE(info.features.sha2); + EXPECT_FALSE(info.features.crc32); + EXPECT_FALSE(info.features.atomics); + EXPECT_FALSE(info.features.fphp); + EXPECT_FALSE(info.features.asimdhp); + EXPECT_FALSE(info.features.cpuid); + EXPECT_FALSE(info.features.asimdrdm); + EXPECT_FALSE(info.features.jscvt); + EXPECT_FALSE(info.features.fcma); + EXPECT_FALSE(info.features.lrcpc); + EXPECT_FALSE(info.features.dcpop); + EXPECT_FALSE(info.features.sha3); + EXPECT_FALSE(info.features.sm3); + EXPECT_FALSE(info.features.sm4); + EXPECT_FALSE(info.features.asimddp); + EXPECT_FALSE(info.features.sha512); + EXPECT_FALSE(info.features.sve); + EXPECT_FALSE(info.features.asimdfhm); + EXPECT_FALSE(info.features.dit); + EXPECT_FALSE(info.features.uscat); + EXPECT_FALSE(info.features.ilrcpc); + EXPECT_FALSE(info.features.flagm); + EXPECT_FALSE(info.features.ssbs); + EXPECT_FALSE(info.features.sb); + EXPECT_FALSE(info.features.paca); + EXPECT_FALSE(info.features.pacg); +} + +TEST(CpuinfoAarch64Test, FromHardwareCap2) { + ResetHwcaps(); + SetHardwareCapabilities(AARCH64_HWCAP_FP, + AARCH64_HWCAP2_SVE2 | AARCH64_HWCAP2_BTI); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetAarch64Info(); + EXPECT_TRUE(info.features.fp); + + EXPECT_TRUE(info.features.sve2); + EXPECT_TRUE(info.features.bti); + + EXPECT_FALSE(info.features.dcpodp); + EXPECT_FALSE(info.features.sveaes); + EXPECT_FALSE(info.features.svepmull); + EXPECT_FALSE(info.features.svebitperm); + EXPECT_FALSE(info.features.svesha3); + EXPECT_FALSE(info.features.svesm4); + EXPECT_FALSE(info.features.flagm2); + EXPECT_FALSE(info.features.frint); + EXPECT_FALSE(info.features.svei8mm); + EXPECT_FALSE(info.features.svef32mm); + EXPECT_FALSE(info.features.svef64mm); + EXPECT_FALSE(info.features.svebf16); + EXPECT_FALSE(info.features.i8mm); + EXPECT_FALSE(info.features.bf16); + EXPECT_FALSE(info.features.dgh); + EXPECT_FALSE(info.features.rng); +} + +TEST(CpuinfoAarch64Test, ARMCortexA53) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(Processor : AArch64 Processor rev 3 (aarch64) +processor : 0 +processor : 1 +processor : 2 +processor : 3 +processor : 4 +processor : 5 +processor : 6 +processor : 7 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 +CPU implementer : 0x41 +CPU architecture: AArch64 +CPU variant : 0x0 +CPU part : 0xd03 +CPU revision : 3)"); + const auto info = GetAarch64Info(); + EXPECT_EQ(info.implementer, 0x41); + EXPECT_EQ(info.variant, 0x0); + EXPECT_EQ(info.part, 0xd03); + EXPECT_EQ(info.revision, 3); + + EXPECT_TRUE(info.features.fp); + EXPECT_TRUE(info.features.asimd); + EXPECT_TRUE(info.features.evtstrm); + EXPECT_TRUE(info.features.aes); + EXPECT_TRUE(info.features.pmull); + EXPECT_TRUE(info.features.sha1); + EXPECT_TRUE(info.features.sha2); + EXPECT_TRUE(info.features.crc32); + + EXPECT_FALSE(info.features.atomics); + EXPECT_FALSE(info.features.fphp); + EXPECT_FALSE(info.features.asimdhp); + EXPECT_FALSE(info.features.cpuid); + EXPECT_FALSE(info.features.asimdrdm); + EXPECT_FALSE(info.features.jscvt); + EXPECT_FALSE(info.features.fcma); + EXPECT_FALSE(info.features.lrcpc); + EXPECT_FALSE(info.features.dcpop); + EXPECT_FALSE(info.features.sha3); + EXPECT_FALSE(info.features.sm3); + EXPECT_FALSE(info.features.sm4); + EXPECT_FALSE(info.features.asimddp); + EXPECT_FALSE(info.features.sha512); + EXPECT_FALSE(info.features.sve); + EXPECT_FALSE(info.features.asimdfhm); + EXPECT_FALSE(info.features.dit); + EXPECT_FALSE(info.features.uscat); + EXPECT_FALSE(info.features.ilrcpc); + EXPECT_FALSE(info.features.flagm); + EXPECT_FALSE(info.features.ssbs); + EXPECT_FALSE(info.features.sb); + EXPECT_FALSE(info.features.paca); + EXPECT_FALSE(info.features.pacg); + EXPECT_FALSE(info.features.dcpodp); + EXPECT_FALSE(info.features.sve2); + EXPECT_FALSE(info.features.sveaes); + EXPECT_FALSE(info.features.svepmull); + EXPECT_FALSE(info.features.svebitperm); + EXPECT_FALSE(info.features.svesha3); + EXPECT_FALSE(info.features.svesm4); + EXPECT_FALSE(info.features.flagm2); + EXPECT_FALSE(info.features.frint); + EXPECT_FALSE(info.features.svei8mm); + EXPECT_FALSE(info.features.svef32mm); + EXPECT_FALSE(info.features.svef64mm); + EXPECT_FALSE(info.features.svebf16); + EXPECT_FALSE(info.features.i8mm); + EXPECT_FALSE(info.features.bf16); + EXPECT_FALSE(info.features.dgh); + EXPECT_FALSE(info.features.rng); + EXPECT_FALSE(info.features.bti); + EXPECT_FALSE(info.features.mte); +} + +} // namespace +} // namespace cpu_features diff --git a/cpu_features/test/cpuinfo_arm_test.cc b/cpu_features/test/cpuinfo_arm_test.cc new file mode 100644 index 0000000..85ee7fb --- /dev/null +++ b/cpu_features/test/cpuinfo_arm_test.cc @@ -0,0 +1,354 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_arm.h" + +#include "filesystem_for_testing.h" +#include "gtest/gtest.h" +#include "hwcaps_for_testing.h" + +namespace cpu_features { +namespace { + +TEST(CpuinfoArmTest, FromHardwareCap) { + ResetHwcaps(); + SetHardwareCapabilities(ARM_HWCAP_NEON, ARM_HWCAP2_AES | ARM_HWCAP2_CRC32); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetArmInfo(); + EXPECT_TRUE(info.features.vfp); // triggered by vfpv3 + EXPECT_TRUE(info.features.vfpv3); // triggered by neon + EXPECT_TRUE(info.features.neon); + EXPECT_TRUE(info.features.aes); + EXPECT_TRUE(info.features.crc32); + + EXPECT_FALSE(info.features.vfpv4); + EXPECT_FALSE(info.features.iwmmxt); + EXPECT_FALSE(info.features.crunch); + EXPECT_FALSE(info.features.thumbee); + EXPECT_FALSE(info.features.vfpv3d16); + EXPECT_FALSE(info.features.idiva); + EXPECT_FALSE(info.features.idivt); + EXPECT_FALSE(info.features.pmull); + EXPECT_FALSE(info.features.sha1); + EXPECT_FALSE(info.features.sha2); + + // check some random features with EnumValue(): + EXPECT_TRUE(GetArmFeaturesEnumValue(&info.features, ARM_VFP)); + EXPECT_FALSE(GetArmFeaturesEnumValue(&info.features, ARM_VFPV4)); + // out of bound EnumValue() check + EXPECT_FALSE(GetArmFeaturesEnumValue(&info.features, (ArmFeaturesEnum)~0x0)); +} + +TEST(CpuinfoArmTest, ODroidFromCpuInfo) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"(processor : 0 +model name : ARMv7 Processor rev 3 (v71) +BogoMIPS : 120.00 +Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae +CPU implementer : 0x41 +CPU architecture: 7 +CPU variant : 0x2 +CPU part : 0xc0f +CPU revision : 3)"); + const auto info = GetArmInfo(); + EXPECT_EQ(info.implementer, 0x41); + EXPECT_EQ(info.variant, 0x2); + EXPECT_EQ(info.part, 0xc0f); + EXPECT_EQ(info.revision, 3); + EXPECT_EQ(info.architecture, 7); + + EXPECT_FALSE(info.features.swp); + EXPECT_TRUE(info.features.half); + EXPECT_TRUE(info.features.thumb); + EXPECT_FALSE(info.features._26bit); + EXPECT_TRUE(info.features.fastmult); + EXPECT_FALSE(info.features.fpa); + EXPECT_TRUE(info.features.vfp); + EXPECT_TRUE(info.features.edsp); + EXPECT_FALSE(info.features.java); + EXPECT_FALSE(info.features.iwmmxt); + EXPECT_FALSE(info.features.crunch); + EXPECT_FALSE(info.features.thumbee); + EXPECT_TRUE(info.features.neon); + EXPECT_TRUE(info.features.vfpv3); + EXPECT_FALSE(info.features.vfpv3d16); + EXPECT_TRUE(info.features.tls); + EXPECT_TRUE(info.features.vfpv4); + EXPECT_TRUE(info.features.idiva); + EXPECT_TRUE(info.features.idivt); + EXPECT_TRUE(info.features.vfpd32); + EXPECT_TRUE(info.features.lpae); + EXPECT_FALSE(info.features.evtstrm); + EXPECT_FALSE(info.features.aes); + EXPECT_FALSE(info.features.pmull); + EXPECT_FALSE(info.features.sha1); + EXPECT_FALSE(info.features.sha2); + EXPECT_FALSE(info.features.crc32); +} + +// Linux test-case +TEST(CpuinfoArmTest, RaspberryPiZeroFromCpuInfo) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"(processor : 0 +model name : ARMv6-compatible processor rev 7 (v6l) +BogoMIPS : 697.95 +Features : half thumb fastmult vfp edsp java tls +CPU implementer : 0x41 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0xb76 +CPU revision : 7 + +Hardware : BCM2835 +Revision : 9000c1 +Serial : 000000006cd946f3)"); + const auto info = GetArmInfo(); + EXPECT_EQ(info.implementer, 0x41); + EXPECT_EQ(info.variant, 0x0); + EXPECT_EQ(info.part, 0xb76); + EXPECT_EQ(info.revision, 7); + EXPECT_EQ(info.architecture, 6); + + EXPECT_FALSE(info.features.swp); + EXPECT_TRUE(info.features.half); + EXPECT_TRUE(info.features.thumb); + EXPECT_FALSE(info.features._26bit); + EXPECT_TRUE(info.features.fastmult); + EXPECT_FALSE(info.features.fpa); + EXPECT_TRUE(info.features.vfp); + EXPECT_TRUE(info.features.edsp); + EXPECT_TRUE(info.features.java); + EXPECT_FALSE(info.features.iwmmxt); + EXPECT_FALSE(info.features.crunch); + EXPECT_FALSE(info.features.thumbee); + EXPECT_FALSE(info.features.neon); + EXPECT_FALSE(info.features.vfpv3); + EXPECT_FALSE(info.features.vfpv3d16); + EXPECT_TRUE(info.features.tls); + EXPECT_FALSE(info.features.vfpv4); + EXPECT_FALSE(info.features.idiva); + EXPECT_FALSE(info.features.idivt); + EXPECT_FALSE(info.features.vfpd32); + EXPECT_FALSE(info.features.lpae); + EXPECT_FALSE(info.features.evtstrm); + EXPECT_FALSE(info.features.aes); + EXPECT_FALSE(info.features.pmull); + EXPECT_FALSE(info.features.sha1); + EXPECT_FALSE(info.features.sha2); + EXPECT_FALSE(info.features.crc32); +} + +TEST(CpuinfoArmTest, MarvellArmadaFromCpuInfo) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"(processor : 0 +model name : ARMv7 Processor rev 1 (v7l) +BogoMIPS : 50.00 +Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpd32 +CPU implementer : 0x41 +CPU architecture: 7 +CPU variant : 0x4 +CPU part : 0xc09 +CPU revision : 1 + +processor : 1 +model name : ARMv7 Processor rev 1 (v7l) +BogoMIPS : 50.00 +Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpd32 +CPU implementer : 0x41 +CPU architecture: 7 +CPU variant : 0x4 +CPU part : 0xc09 +CPU revision : 1 + +Hardware : Marvell Armada 380/385 (Device Tree) +Revision : 0000 +Serial : 0000000000000000)"); + const auto info = GetArmInfo(); + EXPECT_EQ(info.implementer, 0x41); + EXPECT_EQ(info.variant, 0x4); + EXPECT_EQ(info.part, 0xc09); + EXPECT_EQ(info.revision, 1); + EXPECT_EQ(info.architecture, 7); + + EXPECT_FALSE(info.features.swp); + EXPECT_TRUE(info.features.half); + EXPECT_TRUE(info.features.thumb); + EXPECT_FALSE(info.features._26bit); + EXPECT_TRUE(info.features.fastmult); + EXPECT_FALSE(info.features.fpa); + EXPECT_TRUE(info.features.vfp); + EXPECT_TRUE(info.features.edsp); + EXPECT_FALSE(info.features.java); + EXPECT_FALSE(info.features.iwmmxt); + EXPECT_FALSE(info.features.crunch); + EXPECT_FALSE(info.features.thumbee); + EXPECT_TRUE(info.features.neon); + EXPECT_TRUE(info.features.vfpv3); + EXPECT_FALSE(info.features.vfpv3d16); + EXPECT_TRUE(info.features.tls); + EXPECT_FALSE(info.features.vfpv4); + EXPECT_FALSE(info.features.idiva); + EXPECT_FALSE(info.features.idivt); + EXPECT_TRUE(info.features.vfpd32); + EXPECT_FALSE(info.features.lpae); + EXPECT_FALSE(info.features.evtstrm); + EXPECT_FALSE(info.features.aes); + EXPECT_FALSE(info.features.pmull); + EXPECT_FALSE(info.features.sha1); + EXPECT_FALSE(info.features.sha2); + EXPECT_FALSE(info.features.crc32); +} + +// Android test-case +// http://code.google.com/p/android/issues/detail?id=10812 +TEST(CpuinfoArmTest, InvalidArmv7) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(Processor : ARMv6-compatible processor rev 6 (v6l) +BogoMIPS : 199.47 +Features : swp half thumb fastmult vfp edsp java +CPU implementer : 0x41 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0xb76 +CPU revision : 6 + +Hardware : SPICA +Revision : 0020 +Serial : 33323613546d00ec )"); + const auto info = GetArmInfo(); + EXPECT_EQ(info.architecture, 6); + + EXPECT_TRUE(info.features.swp); + EXPECT_TRUE(info.features.half); + EXPECT_TRUE(info.features.thumb); + EXPECT_FALSE(info.features._26bit); + EXPECT_TRUE(info.features.fastmult); + EXPECT_FALSE(info.features.fpa); + EXPECT_TRUE(info.features.vfp); + EXPECT_TRUE(info.features.edsp); + EXPECT_TRUE(info.features.java); + EXPECT_FALSE(info.features.iwmmxt); + EXPECT_FALSE(info.features.crunch); + EXPECT_FALSE(info.features.thumbee); + EXPECT_FALSE(info.features.neon); + EXPECT_FALSE(info.features.vfpv3); + EXPECT_FALSE(info.features.vfpv3d16); + EXPECT_FALSE(info.features.tls); + EXPECT_FALSE(info.features.vfpv4); + EXPECT_FALSE(info.features.idiva); + EXPECT_FALSE(info.features.idivt); + EXPECT_FALSE(info.features.vfpd32); + EXPECT_FALSE(info.features.lpae); + EXPECT_FALSE(info.features.evtstrm); + EXPECT_FALSE(info.features.aes); + EXPECT_FALSE(info.features.pmull); + EXPECT_FALSE(info.features.sha1); + EXPECT_FALSE(info.features.sha2); + EXPECT_FALSE(info.features.crc32); +} + +// Android test-case +// https://crbug.com/341598. +TEST(CpuinfoArmTest, InvalidNeon) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(Processor: ARMv7 Processory rev 0 (v71) +processor: 0 +BogoMIPS: 13.50 + +Processor: 1 +BogoMIPS: 13.50 + +Features: swp half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt +CPU implementer : 0x51 +CPU architecture: 7 +CPU variant: 0x1 +CPU part: 0x04d +CPU revision: 0 + +Hardware: SAMSUNG M2 +Revision: 0010 +Serial: 00001e030000354e)"); + const auto info = GetArmInfo(); + EXPECT_TRUE(info.features.swp); + EXPECT_FALSE(info.features.neon); +} + +// The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report IDIV +// support. +TEST(CpuinfoArmTest, Nexus4_0x510006f2) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(CPU implementer : 0x51 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0x6f +CPU revision : 2)"); + const auto info = GetArmInfo(); + EXPECT_TRUE(info.features.idiva); + EXPECT_TRUE(info.features.idivt); + + EXPECT_EQ(GetArmCpuId(&info), 0x510006f2); +} + +// The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report IDIV +// support. +TEST(CpuinfoArmTest, Nexus4_0x510006f3) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(CPU implementer : 0x51 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0x6f +CPU revision : 3)"); + const auto info = GetArmInfo(); + EXPECT_TRUE(info.features.idiva); + EXPECT_TRUE(info.features.idivt); + + EXPECT_EQ(GetArmCpuId(&info), 0x510006f3); +} + +// The emulator-specific Android 4.2 kernel fails to report support for the +// 32-bit ARM IDIV instruction. Technically, this is a feature of the virtual +// CPU implemented by the emulator. +TEST(CpuinfoArmTest, EmulatorSpecificIdiv) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(Processor : ARMv7 Processor rev 0 (v7l) +BogoMIPS : 629.14 +Features : swp half thumb fastmult vfp edsp neon vfpv3 +CPU implementer : 0x41 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0xc08 +CPU revision : 0 + +Hardware : Goldfish +Revision : 0000 +Serial : 0000000000000000)"); + const auto info = GetArmInfo(); + EXPECT_TRUE(info.features.idiva); +} + +} // namespace +} // namespace cpu_features diff --git a/cpu_features/test/cpuinfo_mips_test.cc b/cpu_features/test/cpuinfo_mips_test.cc new file mode 100644 index 0000000..a01624a --- /dev/null +++ b/cpu_features/test/cpuinfo_mips_test.cc @@ -0,0 +1,126 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_mips.h" + +#include "filesystem_for_testing.h" +#include "gtest/gtest.h" +#include "hwcaps_for_testing.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +namespace cpu_features { + +namespace { + +TEST(CpuinfoMipsTest, FromHardwareCapBoth) { + ResetHwcaps(); + SetHardwareCapabilities(MIPS_HWCAP_MSA | MIPS_HWCAP_R6, 0); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetMipsInfo(); + EXPECT_TRUE(info.features.msa); + EXPECT_FALSE(info.features.eva); + EXPECT_TRUE(info.features.r6); +} + +TEST(CpuinfoMipsTest, FromHardwareCapOnlyOne) { + ResetHwcaps(); + SetHardwareCapabilities(MIPS_HWCAP_MSA, 0); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetMipsInfo(); + EXPECT_TRUE(info.features.msa); + EXPECT_FALSE(info.features.eva); +} + +TEST(CpuinfoMipsTest, Ci40) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"(system type : IMG Pistachio SoC (B0) +machine : IMG Marduk – Ci40 with cc2520 +processor : 0 +cpu model : MIPS interAptiv (multi) V2.0 FPU V0.0 +BogoMIPS : 363.72 +wait instruction : yes +microsecond timers : yes +tlb_entries : 64 +extra interrupt vector : yes +hardware watchpoint : yes, count: 4, address/irw mask: [0x0ffc, 0x0ffc, 0x0ffb, 0x0ffb] +isa : mips1 mips2 mips32r1 mips32r2 +ASEs implemented : mips16 dsp mt eva +shadow register sets : 1 +kscratch registers : 0 +package : 0 +core : 0 +VCED exceptions : not available +VCEI exceptions : not available +VPE : 0 +)"); + const auto info = GetMipsInfo(); + EXPECT_FALSE(info.features.msa); + EXPECT_TRUE(info.features.eva); +} + +TEST(CpuinfoMipsTest, AR7161) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(system type : Atheros AR7161 rev 2 +machine : NETGEAR WNDR3700/WNDR3800/WNDRMAC +processor : 0 +cpu model : MIPS 24Kc V7.4 +BogoMIPS : 452.19 +wait instruction : yes +microsecond timers : yes +tlb_entries : 16 +extra interrupt vector : yes +hardware watchpoint : yes, count: 4, address/irw mask: [0x0000, 0x0f98, 0x0f78, 0x0df8] +ASEs implemented : mips16 +shadow register sets : 1 +kscratch registers : 0 +core : 0 +VCED exceptions : not available +VCEI exceptions : not available +)"); + const auto info = GetMipsInfo(); + EXPECT_FALSE(info.features.msa); + EXPECT_FALSE(info.features.eva); +} + +TEST(CpuinfoMipsTest, Goldfish) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"(system type : MIPS-Goldfish +Hardware : goldfish +Revison : 1 +processor : 0 +cpu model : MIPS 24Kc V0.0 FPU V0.0 +BogoMIPS : 1042.02 +wait instruction : yes +microsecond timers : yes +tlb_entries : 16 +extra interrupt vector : yes +hardware watchpoint : yes, count: 1, address/irw mask: [0x0ff8] +ASEs implemented : +shadow register sets : 1 +core : 0 +VCED exceptions : not available +VCEI exceptions : not available +)"); + const auto info = GetMipsInfo(); + EXPECT_FALSE(info.features.msa); + EXPECT_FALSE(info.features.eva); +} + +} // namespace +} // namespace cpu_features diff --git a/cpu_features/test/cpuinfo_ppc_test.cc b/cpu_features/test/cpuinfo_ppc_test.cc new file mode 100644 index 0000000..b43a7c8 --- /dev/null +++ b/cpu_features/test/cpuinfo_ppc_test.cc @@ -0,0 +1,119 @@ +// Copyright 2018 IBM. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_ppc.h" + +#include "filesystem_for_testing.h" +#include "gtest/gtest.h" +#include "hwcaps_for_testing.h" +#include "internal/string_view.h" + +namespace cpu_features { +namespace { + +TEST(CpustringsPPCTest, FromHardwareCap) { + ResetHwcaps(); + SetHardwareCapabilities(PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_VSX, + PPC_FEATURE2_ARCH_3_00); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetPPCInfo(); + EXPECT_TRUE(info.features.fpu); + EXPECT_FALSE(info.features.mmu); + EXPECT_TRUE(info.features.vsx); + EXPECT_TRUE(info.features.arch300); + EXPECT_FALSE(info.features.power4); + EXPECT_FALSE(info.features.altivec); + EXPECT_FALSE(info.features.vcrypto); + EXPECT_FALSE(info.features.htm); +} + +TEST(CpustringsPPCTest, Blade) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(processor : 14 +cpu : POWER7 (architected), altivec supported +clock : 3000.000000MHz +revision : 2.1 (pvr 003f 0201) + +processor : 15 +cpu : POWER7 (architected), altivec supported +clock : 3000.000000MHz +revision : 2.1 (pvr 003f 0201) + +timebase : 512000000 +platform : pSeries +model : IBM,8406-70Y +machine : CHRP IBM,8406-70Y)"); + SetPlatformPointer("power7"); + SetBasePlatformPointer("power8"); + const auto strings = GetPPCPlatformStrings(); + ASSERT_STREQ(strings.platform, "pSeries"); + ASSERT_STREQ(strings.model, "IBM,8406-70Y"); + ASSERT_STREQ(strings.machine, "CHRP IBM,8406-70Y"); + ASSERT_STREQ(strings.cpu, "POWER7 (architected), altivec supported"); + ASSERT_STREQ(strings.type.platform, "power7"); + ASSERT_STREQ(strings.type.base_platform, "power8"); +} + +TEST(CpustringsPPCTest, Firestone) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(processor : 126 +cpu : POWER8 (raw), altivec supported +clock : 2061.000000MHz +revision : 2.0 (pvr 004d 0200) + +processor : 127 +cpu : POWER8 (raw), altivec supported +clock : 2061.000000MHz +revision : 2.0 (pvr 004d 0200) + +timebase : 512000000 +platform : PowerNV +model : 8335-GTA +machine : PowerNV 8335-GTA +firmware : OPAL v3)"); + const auto strings = GetPPCPlatformStrings(); + ASSERT_STREQ(strings.platform, "PowerNV"); + ASSERT_STREQ(strings.model, "8335-GTA"); + ASSERT_STREQ(strings.machine, "PowerNV 8335-GTA"); + ASSERT_STREQ(strings.cpu, "POWER8 (raw), altivec supported"); +} + +TEST(CpustringsPPCTest, w8) { + ResetHwcaps(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(processor : 143 +cpu : POWER9, altivec supported +clock : 2300.000000MHz +revision : 2.2 (pvr 004e 1202) + +timebase : 512000000 +platform : PowerNV +model : 0000000000000000 +machine : PowerNV 0000000000000000 +firmware : OPAL +MMU : Radix)"); + const auto strings = GetPPCPlatformStrings(); + ASSERT_STREQ(strings.platform, "PowerNV"); + ASSERT_STREQ(strings.model, "0000000000000000"); + ASSERT_STREQ(strings.machine, "PowerNV 0000000000000000"); + ASSERT_STREQ(strings.cpu, "POWER9, altivec supported"); +} + +} // namespace +} // namespace cpu_features diff --git a/cpu_features/test/cpuinfo_x86_test.cc b/cpu_features/test/cpuinfo_x86_test.cc new file mode 100644 index 0000000..889190e --- /dev/null +++ b/cpu_features/test/cpuinfo_x86_test.cc @@ -0,0 +1,1099 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_x86.h" + +#include +#include +#include +#include +#if defined(CPU_FEATURES_OS_WINDOWS) +#include // IsProcessorFeaturePresent +#endif // CPU_FEATURES_OS_WINDOWS + +#include "filesystem_for_testing.h" +#include "gtest/gtest.h" +#include "internal/cpuid_x86.h" + +namespace cpu_features { + +class FakeCpu { + public: + Leaf GetCpuidLeaf(uint32_t leaf_id, int ecx) const { + const auto itr = cpuid_leaves_.find(std::make_pair(leaf_id, ecx)); + if (itr != cpuid_leaves_.end()) { + return itr->second; + } + return {0, 0, 0, 0}; + } + + uint32_t GetXCR0Eax() const { return xcr0_eax_; } + + void SetLeaves(std::map, Leaf> configuration) { + cpuid_leaves_ = std::move(configuration); + } + + void SetOsBackupsExtendedRegisters(bool os_backups_extended_registers) { + xcr0_eax_ = os_backups_extended_registers ? -1 : 0; + } + +#if defined(CPU_FEATURES_OS_MACOS) + bool GetDarwinSysCtlByName(std::string name) const { + return darwin_sysctlbyname_.count(name); + } + + void SetDarwinSysCtlByName(std::string name) { + darwin_sysctlbyname_.insert(name); + } +#endif // CPU_FEATURES_OS_MACOS + +#if defined(CPU_FEATURES_OS_WINDOWS) + bool GetWindowsIsProcessorFeaturePresent(DWORD ProcessorFeature) { + return windows_isprocessorfeaturepresent_.count(ProcessorFeature); + } + + void SetWindowsIsProcessorFeaturePresent(DWORD ProcessorFeature) { + windows_isprocessorfeaturepresent_.insert(ProcessorFeature); + } +#endif // CPU_FEATURES_OS_WINDOWS + + private: + std::map, Leaf> cpuid_leaves_; +#if defined(CPU_FEATURES_OS_MACOS) + std::set darwin_sysctlbyname_; +#endif // CPU_FEATURES_OS_MACOS +#if defined(CPU_FEATURES_OS_WINDOWS) + std::set windows_isprocessorfeaturepresent_; +#endif // CPU_FEATURES_OS_WINDOWS + uint32_t xcr0_eax_; +}; + +static FakeCpu* g_fake_cpu_instance = nullptr; + +static FakeCpu& cpu() { + assert(g_fake_cpu_instance != nullptr); + return *g_fake_cpu_instance; +} + +extern "C" Leaf GetCpuidLeaf(uint32_t leaf_id, int ecx) { + return cpu().GetCpuidLeaf(leaf_id, ecx); +} + +extern "C" uint32_t GetXCR0Eax(void) { return cpu().GetXCR0Eax(); } + +#if defined(CPU_FEATURES_OS_MACOS) +extern "C" bool GetDarwinSysCtlByName(const char* name) { + return cpu().GetDarwinSysCtlByName(name); +} +#endif // CPU_FEATURES_OS_MACOS + +#if defined(CPU_FEATURES_OS_WINDOWS) +extern "C" bool GetWindowsIsProcessorFeaturePresent(DWORD ProcessorFeature) { + return cpu().GetWindowsIsProcessorFeaturePresent(ProcessorFeature); +} +#endif // CPU_FEATURES_OS_WINDOWS + +namespace { + +class CpuidX86Test : public ::testing::Test { + protected: + void SetUp() override { + assert(g_fake_cpu_instance == nullptr); + g_fake_cpu_instance = new FakeCpu(); + } + void TearDown() override { + delete g_fake_cpu_instance; + g_fake_cpu_instance = nullptr; + } +}; + +TEST_F(CpuidX86Test, SandyBridge) { + cpu().SetOsBackupsExtendedRegisters(true); + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x000206A6, 0x00100800, 0x1F9AE3BF, 0xBFEBFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + }); + const auto info = GetX86Info(); + EXPECT_STREQ(info.vendor, "GenuineIntel"); + EXPECT_EQ(info.family, 0x06); + EXPECT_EQ(info.model, 0x02A); + EXPECT_EQ(info.stepping, 0x06); + // Leaf 7 is zeroed out so none of the Leaf 7 flags are set. + const auto features = info.features; + EXPECT_FALSE(features.erms); + EXPECT_FALSE(features.avx2); + EXPECT_FALSE(features.avx512f); + EXPECT_FALSE(features.avx512cd); + EXPECT_FALSE(features.avx512er); + EXPECT_FALSE(features.avx512pf); + EXPECT_FALSE(features.avx512bw); + EXPECT_FALSE(features.avx512dq); + EXPECT_FALSE(features.avx512vl); + EXPECT_FALSE(features.avx512ifma); + EXPECT_FALSE(features.avx512vbmi); + EXPECT_FALSE(features.avx512vbmi2); + EXPECT_FALSE(features.avx512vnni); + EXPECT_FALSE(features.avx512bitalg); + EXPECT_FALSE(features.avx512vpopcntdq); + EXPECT_FALSE(features.avx512_4vnniw); + EXPECT_FALSE(features.avx512_4fmaps); + // All old cpu features should be set. + EXPECT_TRUE(features.aes); + EXPECT_TRUE(features.ssse3); + EXPECT_TRUE(features.sse4_1); + EXPECT_TRUE(features.sse4_2); + EXPECT_TRUE(features.avx); + EXPECT_FALSE(features.sha); + EXPECT_TRUE(features.popcnt); + EXPECT_FALSE(features.movbe); + EXPECT_FALSE(features.rdrnd); + EXPECT_FALSE(features.adx); +} + +const int UNDEF = -1; +const int KiB = 1024; +const int MiB = 1024 * KiB; + +TEST_F(CpuidX86Test, SandyBridgeTestOsSupport) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x000206A6, 0x00100800, 0x1F9AE3BF, 0xBFEBFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + }); + // avx is disabled if os does not support backing up ymm registers. + cpu().SetOsBackupsExtendedRegisters(false); + EXPECT_FALSE(GetX86Info().features.avx); + // avx is disabled if os does not support backing up ymm registers. + cpu().SetOsBackupsExtendedRegisters(true); + EXPECT_TRUE(GetX86Info().features.avx); +} + +TEST_F(CpuidX86Test, SkyLake) { + cpu().SetOsBackupsExtendedRegisters(true); + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000016, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x000406E3, 0x00100800, 0x7FFAFBBF, 0xBFEBFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x029C67AF, 0x00000000, 0x00000000}}, + }); + const auto info = GetX86Info(); + EXPECT_STREQ(info.vendor, "GenuineIntel"); + EXPECT_EQ(info.family, 0x06); + EXPECT_EQ(info.model, 0x04E); + EXPECT_EQ(info.stepping, 0x03); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::INTEL_SKL); +} + +TEST_F(CpuidX86Test, Branding) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000016, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x000406E3, 0x00100800, 0x7FFAFBBF, 0xBFEBFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x029C67AF, 0x00000000, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x80000008, 0x00000000, 0x00000000, 0x00000000}}, + {{0x80000001, 0}, Leaf{0x00000000, 0x00000000, 0x00000121, 0x2C100000}}, + {{0x80000002, 0}, Leaf{0x65746E49, 0x2952286C, 0x726F4320, 0x4D542865}}, + {{0x80000003, 0}, Leaf{0x37692029, 0x3035362D, 0x43205530, 0x40205550}}, + {{0x80000004, 0}, Leaf{0x352E3220, 0x7A484730, 0x00000000, 0x00000000}}, + }); + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "Intel(R) Core(TM) i7-6500U CPU @ 2.50GHz"); +} + +TEST_F(CpuidX86Test, KabyLakeCache) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000016, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x000406E3, 0x00100800, 0x7FFAFBBF, 0xBFEBFBFF}}, + {{0x00000004, 0}, Leaf{0x1C004121, 0x01C0003F, 0x0000003F, 0x00000000}}, + {{0x00000004, 1}, Leaf{0x1C004122, 0x01C0003F, 0x0000003F, 0x00000000}}, + {{0x00000004, 2}, Leaf{0x1C004143, 0x00C0003F, 0x000003FF, 0x00000000}}, + {{0x00000004, 3}, Leaf{0x1C03C163, 0x02C0003F, 0x00001FFF, 0x00000002}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x029C67AF, 0x00000000, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x80000008, 0x00000000, 0x00000000, 0x00000000}}, + {{0x80000001, 0}, Leaf{0x00000000, 0x00000000, 0x00000121, 0x2C100000}}, + {{0x80000002, 0}, Leaf{0x65746E49, 0x2952286C, 0x726F4320, 0x4D542865}}, + {{0x80000003, 0}, Leaf{0x37692029, 0x3035362D, 0x43205530, 0x40205550}}, + }); + const auto info = GetX86CacheInfo(); + EXPECT_EQ(info.size, 4); + EXPECT_EQ(info.levels[0].level, 1); + EXPECT_EQ(info.levels[0].cache_type, 1); + EXPECT_EQ(info.levels[0].cache_size, 32 * KiB); + EXPECT_EQ(info.levels[0].ways, 8); + EXPECT_EQ(info.levels[0].line_size, 64); + EXPECT_EQ(info.levels[0].tlb_entries, 64); + EXPECT_EQ(info.levels[0].partitioning, 1); + + EXPECT_EQ(info.levels[1].level, 1); + EXPECT_EQ(info.levels[1].cache_type, 2); + EXPECT_EQ(info.levels[1].cache_size, 32 * KiB); + EXPECT_EQ(info.levels[1].ways, 8); + EXPECT_EQ(info.levels[1].line_size, 64); + EXPECT_EQ(info.levels[1].tlb_entries, 64); + EXPECT_EQ(info.levels[1].partitioning, 1); + + EXPECT_EQ(info.levels[2].level, 2); + EXPECT_EQ(info.levels[2].cache_type, 3); + EXPECT_EQ(info.levels[2].cache_size, 256 * KiB); + EXPECT_EQ(info.levels[2].ways, 4); + EXPECT_EQ(info.levels[2].line_size, 64); + EXPECT_EQ(info.levels[2].tlb_entries, 1024); + EXPECT_EQ(info.levels[2].partitioning, 1); + + EXPECT_EQ(info.levels[3].level, 3); + EXPECT_EQ(info.levels[3].cache_type, 3); + EXPECT_EQ(info.levels[3].cache_size, 6 * MiB); + EXPECT_EQ(info.levels[3].ways, 12); + EXPECT_EQ(info.levels[3].line_size, 64); + EXPECT_EQ(info.levels[3].tlb_entries, 8192); + EXPECT_EQ(info.levels[3].partitioning, 1); +} + +TEST_F(CpuidX86Test, HSWCache) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000016, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x000406E3, 0x00100800, 0x7FFAFBBF, 0xBFEBFBFF}}, + {{0x00000004, 0}, Leaf{0x1C004121, 0x01C0003F, 0x0000003F, 0x00000000}}, + {{0x00000004, 1}, Leaf{0x1C004122, 0x01C0003F, 0x0000003F, 0x00000000}}, + {{0x00000004, 2}, Leaf{0x1C004143, 0x01C0003F, 0x000001FF, 0x00000000}}, + {{0x00000004, 3}, Leaf{0x1C03C163, 0x02C0003F, 0x00001FFF, 0x00000006}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x029C67AF, 0x00000000, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x80000008, 0x00000000, 0x00000000, 0x00000000}}, + {{0x80000001, 0}, Leaf{0x00000000, 0x00000000, 0x00000121, 0x2C100000}}, + {{0x80000002, 0}, Leaf{0x65746E49, 0x2952286C, 0x726F4320, 0x4D542865}}, + {{0x80000003, 0}, Leaf{0x37692029, 0x3035362D, 0x43205530, 0x40205550}}, + }); + const auto info = GetX86CacheInfo(); + EXPECT_EQ(info.size, 4); + EXPECT_EQ(info.levels[0].level, 1); + EXPECT_EQ(info.levels[0].cache_type, 1); + EXPECT_EQ(info.levels[0].cache_size, 32 * KiB); + EXPECT_EQ(info.levels[0].ways, 8); + EXPECT_EQ(info.levels[0].line_size, 64); + EXPECT_EQ(info.levels[0].tlb_entries, 64); + EXPECT_EQ(info.levels[0].partitioning, 1); + + EXPECT_EQ(info.levels[1].level, 1); + EXPECT_EQ(info.levels[1].cache_type, 2); + EXPECT_EQ(info.levels[1].cache_size, 32 * KiB); + EXPECT_EQ(info.levels[1].ways, 8); + EXPECT_EQ(info.levels[1].line_size, 64); + EXPECT_EQ(info.levels[1].tlb_entries, 64); + EXPECT_EQ(info.levels[1].partitioning, 1); + + EXPECT_EQ(info.levels[2].level, 2); + EXPECT_EQ(info.levels[2].cache_type, 3); + EXPECT_EQ(info.levels[2].cache_size, 256 * KiB); + EXPECT_EQ(info.levels[2].ways, 8); + EXPECT_EQ(info.levels[2].line_size, 64); + EXPECT_EQ(info.levels[2].tlb_entries, 512); + EXPECT_EQ(info.levels[2].partitioning, 1); + + EXPECT_EQ(info.levels[3].level, 3); + EXPECT_EQ(info.levels[3].cache_type, 3); + EXPECT_EQ(info.levels[3].cache_size, 6 * MiB); + EXPECT_EQ(info.levels[3].ways, 12); + EXPECT_EQ(info.levels[3].line_size, 64); + EXPECT_EQ(info.levels[3].tlb_entries, 8192); + EXPECT_EQ(info.levels[3].partitioning, 1); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0200F30_K11_Griffin_CPUID.txt +TEST_F(CpuidX86Test, AMD_K11_GRIFFIN) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000001, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00200F30, 0x00020800, 0x00002001, 0x178BFBFF}}, + {{0x80000000, 0}, Leaf{0x8000001A, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00200F30, 0x20000000, 0x0000131F, 0xEBD3FBFF}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x11); + EXPECT_EQ(info.model, 0x03); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::AMD_K11); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0300F10_K12_Llano_CPUID.txt +TEST_F(CpuidX86Test, AMD_K12_LLANO) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000006, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00300F10, 0x00040800, 0x00802009, 0x178BFBFF}}, + {{0x80000000, 0}, Leaf{0x8000001B, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00300F10, 0x20002B31, 0x000037FF, 0xEFD3FBFF}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x12); + EXPECT_EQ(info.model, 0x01); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::AMD_K12); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0500F01_K14_Bobcat_CPUID.txt +TEST_F(CpuidX86Test, AMD_K14_BOBCAT_AMD0500F01) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000006, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00500F01, 0x00020800, 0x00802209, 0x178BFBFF}}, + {{0x80000000, 0}, Leaf{0x8000001B, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00500F01, 0x00000000, 0x000035FF, 0x2FD3FBFF}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x14); + EXPECT_EQ(info.model, 0x00); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::AMD_BOBCAT); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0500F10_K14_Bobcat_CPUID.txt +TEST_F(CpuidX86Test, AMD_K14_BOBCAT_AMD0500F10) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000006, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00500F10, 0x00020800, 0x00802209, 0x178BFBFF}}, + {{0x00000002, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x00000003, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x00000005, 0}, Leaf{0x00000040, 0x00000040, 0x00000003, 0x00000000}}, + {{0x00000006, 0}, Leaf{0x00000000, 0x00000000, 0x00000001, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x8000001B, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00500F10, 0x00001242, 0x000035FF, 0x2FD3FBFF}}, + {{0x80000002, 0}, Leaf{0x20444D41, 0x35332D45, 0x72502030, 0x7365636F}}, + {{0x80000003, 0}, Leaf{0x00726F73, 0x00000000, 0x00000000, 0x00000000}}, + {{0x80000004, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x80000005, 0}, Leaf{0xFF08FF08, 0xFF280000, 0x20080140, 0x20020140}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x14); + EXPECT_EQ(info.model, 0x01); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::AMD_BOBCAT); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0500F20_K14_Bobcat_CPUID.txt +TEST_F(CpuidX86Test, AMD_K14_BOBCAT_AMD0500F20) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000006, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00500F20, 0x00020800, 0x00802209, 0x178BFBFF}}, + {{0x80000000, 0}, Leaf{0x8000001B, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00500F20, 0x000012E9, 0x000035FF, 0x2FD3FBFF}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x14); + EXPECT_EQ(info.model, 0x02); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::AMD_BOBCAT); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0670F00_K15_StoneyRidge_CPUID.txt +TEST_F(CpuidX86Test, AMD_K15_EXCAVATOR_STONEY_RIDGE) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00670F00, 0x00020800, 0x7ED8320B, 0x178BFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x000001A9, 0x00000000, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x8000001E, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00670F00, 0x00000000, 0x2FABBFFF, 0x2FD3FBFF}}, + {{0x80000002, 0}, Leaf{0x20444D41, 0x392D3941, 0x20303134, 0x45444152}}, + {{0x80000003, 0}, Leaf{0x52204E4F, 0x35202C35, 0x4D4F4320, 0x45545550}}, + {{0x80000004, 0}, Leaf{0x524F4320, 0x32205345, 0x47332B43, 0x00202020}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x15); + EXPECT_EQ(info.model, 0x70); + EXPECT_EQ(GetX86Microarchitecture(&info), + X86Microarchitecture::AMD_EXCAVATOR); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "AMD A9-9410 RADEON R5, 5 COMPUTE CORES 2C+3G "); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0600F20_K15_AbuDhabi_CPUID0.txt +TEST_F(CpuidX86Test, AMD_K15_PILEDRIVER_ABU_DHABI) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00600F20, 0x00100800, 0x3E98320B, 0x178BFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x00000008, 0x00000000, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x8000001E, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00600F20, 0x30000000, 0x01EBBFFF, 0x2FD3FBFF}}, + {{0x80000002, 0}, Leaf{0x20444D41, 0x6574704F, 0x286E6F72, 0x20296D74}}, + {{0x80000003, 0}, Leaf{0x636F7250, 0x6F737365, 0x33362072, 0x20203637}}, + {{0x80000004, 0}, Leaf{0x20202020, 0x20202020, 0x20202020, 0x00202020}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x15); + EXPECT_EQ(info.model, 0x02); + EXPECT_EQ(GetX86Microarchitecture(&info), + X86Microarchitecture::AMD_PILEDRIVER); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "AMD Opteron(tm) Processor 6376 "); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0600F20_K15_AbuDhabi_CPUID0.txt +TEST_F(CpuidX86Test, AMD_K15_PILEDRIVER_ABU_DHABI_CACHE_INFO) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00600F20, 0x00100800, 0x3E98320B, 0x178BFBFF}}, + {{0x80000000, 0}, Leaf{0x8000001E, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00600F20, 0x30000000, 0x01EBBFFF, 0x2FD3FBFF}}, + {{0x8000001D, 0}, Leaf{0x00000121, 0x00C0003F, 0x0000003F, 0x00000000}}, + {{0x8000001D, 1}, Leaf{0x00004122, 0x0040003F, 0x000001FF, 0x00000000}}, + {{0x8000001D, 2}, Leaf{0x00004143, 0x03C0003F, 0x000007FF, 0x00000001}}, + {{0x8000001D, 3}, Leaf{0x0001C163, 0x0BC0003F, 0x000007FF, 0x00000001}}, + }); + const auto info = GetX86CacheInfo(); + + EXPECT_EQ(info.size, 4); + EXPECT_EQ(info.levels[0].level, 1); + EXPECT_EQ(info.levels[0].cache_type, 1); + EXPECT_EQ(info.levels[0].cache_size, 16 * KiB); + EXPECT_EQ(info.levels[0].ways, 4); + EXPECT_EQ(info.levels[0].line_size, 64); + EXPECT_EQ(info.levels[0].tlb_entries, 64); + EXPECT_EQ(info.levels[0].partitioning, 1); + + EXPECT_EQ(info.levels[1].level, 1); + EXPECT_EQ(info.levels[1].cache_type, 2); + EXPECT_EQ(info.levels[1].cache_size, 64 * KiB); + EXPECT_EQ(info.levels[1].ways, 2); + EXPECT_EQ(info.levels[1].line_size, 64); + EXPECT_EQ(info.levels[1].tlb_entries, 512); + EXPECT_EQ(info.levels[1].partitioning, 1); + + EXPECT_EQ(info.levels[2].level, 2); + EXPECT_EQ(info.levels[2].cache_type, 3); + EXPECT_EQ(info.levels[2].cache_size, 2 * MiB); + EXPECT_EQ(info.levels[2].ways, 16); + EXPECT_EQ(info.levels[2].line_size, 64); + EXPECT_EQ(info.levels[2].tlb_entries, 2048); + EXPECT_EQ(info.levels[2].partitioning, 1); + + EXPECT_EQ(info.levels[3].level, 3); + EXPECT_EQ(info.levels[3].cache_type, 3); + EXPECT_EQ(info.levels[3].cache_size, 6 * MiB); + EXPECT_EQ(info.levels[3].ways, 48); + EXPECT_EQ(info.levels[3].line_size, 64); + EXPECT_EQ(info.levels[3].tlb_entries, 2048); + EXPECT_EQ(info.levels[3].partitioning, 1); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0600F12_K15_Interlagos_CPUID3.txt +TEST_F(CpuidX86Test, AMD_K15_BULLDOZER_INTERLAGOS) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00600F12, 0x000C0800, 0x1E98220B, 0x178BFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x8000001E, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00600F12, 0x30000000, 0x01C9BFFF, 0x2FD3FBFF}}, + {{0x80000002, 0}, Leaf{0x20444D41, 0x6574704F, 0x286E6F72, 0x20294D54}}, + {{0x80000003, 0}, Leaf{0x636F7250, 0x6F737365, 0x32362072, 0x20203833}}, + {{0x80000004, 0}, Leaf{0x20202020, 0x20202020, 0x20202020, 0x00202020}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x15); + EXPECT_EQ(info.model, 0x01); + EXPECT_EQ(GetX86Microarchitecture(&info), + X86Microarchitecture::AMD_BULLDOZER); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "AMD Opteron(TM) Processor 6238 "); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD0630F81_K15_Godavari_CPUID.txt +TEST_F(CpuidX86Test, AMD_K15_STREAMROLLER_GODAVARI) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00630F81, 0x00040800, 0x3E98320B, 0x178BFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x8000001E, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00630F81, 0x10000000, 0x0FEBBFFF, 0x2FD3FBFF}}, + {{0x80000002, 0}, Leaf{0x20444D41, 0x372D3841, 0x4B303736, 0x64615220}}, + {{0x80000003, 0}, Leaf{0x206E6F65, 0x202C3752, 0x43203031, 0x75706D6F}}, + {{0x80000004, 0}, Leaf{0x43206574, 0x7365726F, 0x2B433420, 0x00204736}}, + {{0x80000005, 0}, Leaf{0xFF40FF18, 0xFF40FF30, 0x10040140, 0x60030140}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x15); + EXPECT_EQ(info.model, 0x38); + EXPECT_EQ(info.stepping, 0x01); + EXPECT_EQ(GetX86Microarchitecture(&info), + X86Microarchitecture::AMD_STREAMROLLER); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "AMD A8-7670K Radeon R7, 10 Compute Cores 4C+6G "); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0700F01_K16_Kabini_CPUID.txt +TEST_F(CpuidX86Test, AMD_K16_JAGUAR_KABINI) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00700F01, 0x00040800, 0x3ED8220B, 0x178BFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x00000008, 0x00000000, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x8000001E, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00700F01, 0x00000000, 0x154037FF, 0x2FD3FBFF}}, + {{0x80000002, 0}, Leaf{0x20444D41, 0x352D3441, 0x20303030, 0x20555041}}, + {{0x80000003, 0}, Leaf{0x68746977, 0x64615220, 0x286E6F65, 0x20294D54}}, + {{0x80000004, 0}, Leaf{0x47204448, 0x68706172, 0x20736369, 0x00202020}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x16); + EXPECT_EQ(info.model, 0x00); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::AMD_JAGUAR); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "AMD A4-5000 APU with Radeon(TM) HD Graphics "); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0730F01_K16_Beema_CPUID2.txt +TEST_F(CpuidX86Test, AMD_K16_PUMA_BEEMA) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00730F01, 0x00040800, 0x7ED8220B, 0x178BFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x00000008, 0x00000000, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x8000001E, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00730F01, 0x00000000, 0x1D4037FF, 0x2FD3FBFF}}, + {{0x80000002, 0}, Leaf{0x20444D41, 0x362D3641, 0x20303133, 0x20555041}}, + {{0x80000003, 0}, Leaf{0x68746977, 0x444D4120, 0x64615220, 0x206E6F65}}, + {{0x80000004, 0}, Leaf{0x47203452, 0x68706172, 0x20736369, 0x00202020}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x16); + EXPECT_EQ(info.model, 0x30); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::AMD_PUMA); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "AMD A6-6310 APU with AMD Radeon R4 Graphics "); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0820F01_K17_Dali_CPUID.txt +TEST_F(CpuidX86Test, AMD_K17_ZEN_DALI) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00820F01, 0x00020800, 0x7ED8320B, 0x178BFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x209C01A9, 0x00000000, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x8000001F, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00820F01, 0x00000000, 0x35C233FF, 0x2FD3FBFF}}, + {{0x80000002, 0}, Leaf{0x20444D41, 0x30323033, 0x69772065, 0x52206874}}, + {{0x80000003, 0}, Leaf{0x6F656461, 0x7247206E, 0x69687061, 0x20207363}}, + {{0x80000004, 0}, Leaf{0x20202020, 0x20202020, 0x20202020, 0x00202020}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x17); + EXPECT_EQ(info.model, 0x20); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::AMD_ZEN); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "AMD 3020e with Radeon Graphics "); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0800F82_K17_ZenP_CPUID.txt +TEST_F(CpuidX86Test, AMD_K17_ZEN_PLUS_PINNACLE_RIDGE) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00800F82, 0x00100800, 0x7ED8320B, 0x178BFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x209C01A9, 0x00000000, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x8000001F, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00800F82, 0x20000000, 0x35C233FF, 0x2FD3FBFF}}, + {{0x80000002, 0}, Leaf{0x20444D41, 0x657A7952, 0x2037206E, 0x30303732}}, + {{0x80000003, 0}, Leaf{0x69452058, 0x2D746867, 0x65726F43, 0x6F725020}}, + {{0x80000004, 0}, Leaf{0x73736563, 0x2020726F, 0x20202020, 0x00202020}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x17); + EXPECT_EQ(info.model, 0x08); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::AMD_ZEN_PLUS); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "AMD Ryzen 7 2700X Eight-Core Processor "); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0840F70_K17_CPUID.txt +TEST_F(CpuidX86Test, AMD_K17_ZEN2_XBOX_SERIES_X) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000010, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00840F70, 0x00100800, 0x7ED8320B, 0x178BFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x219C91A9, 0x00400004, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x80000020, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00840F70, 0x00000000, 0xF5C2B7FF, 0x2FD3FBFF}}, + {{0x80000002, 0}, Leaf{0x20444D41, 0x30303734, 0x2D382053, 0x65726F43}}, + {{0x80000003, 0}, Leaf{0x6F725020, 0x73736563, 0x4420726F, 0x746B7365}}, + {{0x80000004, 0}, Leaf{0x4B20706F, 0x00007469, 0x00000000, 0x00000000}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x17); + EXPECT_EQ(info.model, 0x47); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::AMD_ZEN2); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "AMD 4700S 8-Core Processor Desktop Kit"); +} + +// http://users.atw.hu/instlatx64/HygonGenuine/HygonGenuine0900F02_Hygon_CPUID3.txt +TEST_F(CpuidX86Test, AMD_K18_ZEN_DHYANA) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x6F677948, 0x656E6975, 0x6E65476E}}, + {{0x00000001, 0}, Leaf{0x00900F02, 0x00100800, 0x74D83209, 0x178BFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x009C01A9, 0x0040068C, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x8000001F, 0x6F677948, 0x656E6975, 0x6E65476E}}, + {{0x80000001, 0}, Leaf{0x00900F02, 0x60000000, 0x35C233FF, 0x2FD3FBFF}}, + {{0x80000002, 0}, Leaf{0x6F677948, 0x3843206E, 0x31332036, 0x20203538}}, + {{0x80000003, 0}, Leaf{0x6F632D38, 0x50206572, 0x65636F72, 0x726F7373}}, + {{0x80000004, 0}, Leaf{0x20202020, 0x20202020, 0x20202020, 0x00202020}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "HygonGenuine"); + EXPECT_EQ(info.family, 0x18); + EXPECT_EQ(info.model, 0x00); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::AMD_ZEN); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "Hygon C86 3185 8-core Processor "); +} + +// http://users.atw.hu/instlatx64/HygonGenuine/HygonGenuine0900F02_Hygon_CPUID.txt +TEST_F(CpuidX86Test, AMD_K18_ZEN_DHYANA_CACHE_INFO) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x6F677948, 0x656E6975, 0x6E65476E}}, + {{0x00000001, 0}, Leaf{0x00900F02, 0x00100800, 0x74D83209, 0x178BFBFF}}, + {{0x80000000, 0}, Leaf{0x8000001F, 0x6F677948, 0x656E6975, 0x6E65476E}}, + {{0x80000001, 0}, Leaf{0x00900F02, 0x60000000, 0x35C233FF, 0x2FD3FBFF}}, + {{0x8000001D, 0}, Leaf{0x00004121, 0x01C0003F, 0x0000003F, 0x00000000}}, + {{0x8000001D, 1}, Leaf{0x00004122, 0x00C0003F, 0x000000FF, 0x00000000}}, + {{0x8000001D, 2}, Leaf{0x00004143, 0x01C0003F, 0x000003FF, 0x00000002}}, + {{0x8000001D, 3}, Leaf{0x0001C163, 0x03C0003F, 0x00001FFF, 0x00000001}}, + }); + const auto info = GetX86CacheInfo(); + + EXPECT_EQ(info.size, 4); + EXPECT_EQ(info.levels[0].level, 1); + EXPECT_EQ(info.levels[0].cache_type, 1); + EXPECT_EQ(info.levels[0].cache_size, 32 * KiB); + EXPECT_EQ(info.levels[0].ways, 8); + EXPECT_EQ(info.levels[0].line_size, 64); + EXPECT_EQ(info.levels[0].tlb_entries, 64); + EXPECT_EQ(info.levels[0].partitioning, 1); + + EXPECT_EQ(info.levels[1].level, 1); + EXPECT_EQ(info.levels[1].cache_type, 2); + EXPECT_EQ(info.levels[1].cache_size, 64 * KiB); + EXPECT_EQ(info.levels[1].ways, 4); + EXPECT_EQ(info.levels[1].line_size, 64); + EXPECT_EQ(info.levels[1].tlb_entries, 256); + EXPECT_EQ(info.levels[1].partitioning, 1); + + EXPECT_EQ(info.levels[2].level, 2); + EXPECT_EQ(info.levels[2].cache_type, 3); + EXPECT_EQ(info.levels[2].cache_size, 512 * KiB); + EXPECT_EQ(info.levels[2].ways, 8); + EXPECT_EQ(info.levels[2].line_size, 64); + EXPECT_EQ(info.levels[2].tlb_entries, 1024); + EXPECT_EQ(info.levels[2].partitioning, 1); + + EXPECT_EQ(info.levels[3].level, 3); + EXPECT_EQ(info.levels[3].cache_type, 3); + EXPECT_EQ(info.levels[3].cache_size, 8 * MiB); + EXPECT_EQ(info.levels[3].ways, 16); + EXPECT_EQ(info.levels[3].line_size, 64); + EXPECT_EQ(info.levels[3].tlb_entries, 8192); + EXPECT_EQ(info.levels[3].partitioning, 1); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD/AuthenticAMD0A20F10_K19_Vermeer2_CPUID.txt +TEST_F(CpuidX86Test, AMD_K19_ZEN3_VERMEER) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000010, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x00000001, 0}, Leaf{0x00A20F10, 0x01180800, 0x7ED8320B, 0x178BFBFF}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x219C97A9, 0x0040068C, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x80000023, 0x68747541, 0x444D4163, 0x69746E65}}, + {{0x80000001, 0}, Leaf{0x00A20F10, 0x20000000, 0x75C237FF, 0x2FD3FBFF}}, + {{0x80000002, 0}, Leaf{0x20444D41, 0x657A7952, 0x2039206E, 0x30303935}}, + {{0x80000003, 0}, Leaf{0x32312058, 0x726F432D, 0x72502065, 0x7365636F}}, + {{0x80000004, 0}, Leaf{0x20726F73, 0x20202020, 0x20202020, 0x00202020}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x19); + EXPECT_EQ(info.model, 0x21); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::AMD_ZEN3); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "AMD Ryzen 9 5900X 12-Core Processor "); +} + +// https://github.com/InstLatx64/InstLatx64/blob/master/GenuineIntel/GenuineIntel00106A1_Nehalem_CPUID.txt +TEST_F(CpuidX86Test, Nehalem) { + // Pre AVX cpus don't have xsave + cpu().SetOsBackupsExtendedRegisters(false); +#if defined(CPU_FEATURES_OS_WINDOWS) + cpu().SetWindowsIsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE); + cpu().SetWindowsIsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE); + cpu().SetWindowsIsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE); +#elif defined(CPU_FEATURES_OS_MACOS) + cpu().SetDarwinSysCtlByName("hw.optional.sse"); + cpu().SetDarwinSysCtlByName("hw.optional.sse2"); + cpu().SetDarwinSysCtlByName("hw.optional.sse3"); + cpu().SetDarwinSysCtlByName("hw.optional.supplementalsse3"); + cpu().SetDarwinSysCtlByName("hw.optional.sse4_1"); + cpu().SetDarwinSysCtlByName("hw.optional.sse4_2"); +#elif defined(CPU_FEATURES_OS_FREEBSD) + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/var/run/dmesg.boot", R"( + ---<>--- +Copyright (c) 1992-2020 The FreeBSD Project. +FreeBSD is a registered trademark of The FreeBSD Foundation. + Features=0x1783fbff + Features2=0x5eda2203 +real memory = 2147418112 (2047 MB) +)"); +#elif defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"(processor : +flags : fpu mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 +)"); +#endif + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000B, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x000106A2, 0x00100800, 0x00BCE3BD, 0xBFEBFBFF}}, + {{0x00000002, 0}, Leaf{0x55035A01, 0x00F0B0E3, 0x00000000, 0x09CA212C}}, + {{0x00000003, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x00000004, 0}, Leaf{0x1C004121, 0x01C0003F, 0x0000003F, 0x00000000}}, + {{0x00000004, 0}, Leaf{0x1C004122, 0x00C0003F, 0x0000007F, 0x00000000}}, + {{0x00000004, 0}, Leaf{0x1C004143, 0x01C0003F, 0x000001FF, 0x00000000}}, + {{0x00000004, 0}, Leaf{0x1C03C163, 0x03C0003F, 0x00000FFF, 0x00000002}}, + {{0x00000005, 0}, Leaf{0x00000040, 0x00000040, 0x00000003, 0x00021120}}, + {{0x00000006, 0}, Leaf{0x00000001, 0x00000002, 0x00000001, 0x00000000}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x00000008, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x00000009, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x0000000A, 0}, Leaf{0x07300403, 0x00000000, 0x00000000, 0x00000603}}, + {{0x0000000B, 0}, Leaf{0x00000001, 0x00000001, 0x00000100, 0x00000000}}, + {{0x0000000B, 0}, Leaf{0x00000004, 0x00000002, 0x00000201, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x80000008, 0x00000000, 0x00000000, 0x00000000}}, + {{0x80000001, 0}, Leaf{0x00000000, 0x00000000, 0x00000001, 0x28100000}}, + {{0x80000002, 0}, Leaf{0x756E6547, 0x20656E69, 0x65746E49, 0x2952286C}}, + {{0x80000003, 0}, Leaf{0x55504320, 0x20202020, 0x20202020, 0x40202020}}, + {{0x80000004, 0}, Leaf{0x30303020, 0x20402030, 0x37382E31, 0x007A4847}}, + {{0x80000005, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x80000006, 0}, Leaf{0x00000000, 0x00000000, 0x01006040, 0x00000000}}, + {{0x80000007, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000100}}, + {{0x80000008, 0}, Leaf{0x00003028, 0x00000000, 0x00000000, 0x00000000}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "GenuineIntel"); + EXPECT_EQ(info.family, 0x06); + EXPECT_EQ(info.model, 0x1A); + EXPECT_EQ(info.stepping, 0x02); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::INTEL_NHM); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "Genuine Intel(R) CPU @ 0000 @ 1.87GHz"); + + EXPECT_TRUE(info.features.sse); + EXPECT_TRUE(info.features.sse2); + EXPECT_TRUE(info.features.sse3); +#if !defined(CPU_FEATURES_OS_WINDOWS) + // Currently disabled on Windows as IsProcessorFeaturePresent do not support + // feature detection > sse3. + EXPECT_TRUE(info.features.ssse3); + EXPECT_TRUE(info.features.sse4_1); + EXPECT_TRUE(info.features.sse4_2); +#endif // !defined(CPU_FEATURES_OS_WINDOWS) +} + +// https://github.com/InstLatx64/InstLatx64/blob/master/GenuineIntel/GenuineIntel0030673_Silvermont3_CPUID.txt +TEST_F(CpuidX86Test, Atom) { + // Pre AVX cpus don't have xsave + cpu().SetOsBackupsExtendedRegisters(false); +#if defined(CPU_FEATURES_OS_WINDOWS) + cpu().SetWindowsIsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE); + cpu().SetWindowsIsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE); + cpu().SetWindowsIsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE); +#elif defined(CPU_FEATURES_OS_MACOS) + cpu().SetDarwinSysCtlByName("hw.optional.sse"); + cpu().SetDarwinSysCtlByName("hw.optional.sse2"); + cpu().SetDarwinSysCtlByName("hw.optional.sse3"); + cpu().SetDarwinSysCtlByName("hw.optional.supplementalsse3"); + cpu().SetDarwinSysCtlByName("hw.optional.sse4_1"); + cpu().SetDarwinSysCtlByName("hw.optional.sse4_2"); +#elif defined(CPU_FEATURES_OS_FREEBSD) + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/var/run/dmesg.boot", R"( + ---<>--- +Copyright (c) 1992-2020 The FreeBSD Project. +FreeBSD is a registered trademark of The FreeBSD Foundation. + Features=0x1783fbff + Features2=0x5eda2203 +real memory = 2147418112 (2047 MB) +)"); +#elif defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"( +flags : fpu mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 +)"); +#endif + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000B, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x00030673, 0x00100800, 0x41D8E3BF, 0xBFEBFBFF}}, + {{0x00000002, 0}, Leaf{0x61B3A001, 0x0000FFC2, 0x00000000, 0x00000000}}, + {{0x00000003, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x00000004, 0}, Leaf{0x1C000121, 0x0140003F, 0x0000003F, 0x00000001}}, + {{0x00000004, 1}, Leaf{0x1C000122, 0x01C0003F, 0x0000003F, 0x00000001}}, + {{0x00000004, 2}, Leaf{0x1C00C143, 0x03C0003F, 0x000003FF, 0x00000001}}, + {{0x00000005, 0}, Leaf{0x00000040, 0x00000040, 0x00000003, 0x33000020}}, + {{0x00000006, 0}, Leaf{0x00000005, 0x00000002, 0x00000009, 0x00000000}}, + {{0x00000007, 0}, Leaf{0x00000000, 0x00002282, 0x00000000, 0x00000000}}, + {{0x00000008, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x00000009, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x0000000A, 0}, Leaf{0x07280203, 0x00000000, 0x00000000, 0x00004503}}, + {{0x0000000B, 0}, Leaf{0x00000001, 0x00000001, 0x00000100, 0x00000000}}, + {{0x0000000B, 1}, Leaf{0x00000004, 0x00000004, 0x00000201, 0x00000000}}, + {{0x80000000, 0}, Leaf{0x80000008, 0x00000000, 0x00000000, 0x00000000}}, + {{0x80000001, 0}, Leaf{0x00000000, 0x00000000, 0x00000101, 0x28100000}}, + {{0x80000002, 0}, Leaf{0x20202020, 0x6E492020, 0x286C6574, 0x43202952}}, + {{0x80000003, 0}, Leaf{0x72656C65, 0x52286E6F, 0x50432029, 0x4A202055}}, + {{0x80000004, 0}, Leaf{0x30303931, 0x20402020, 0x39392E31, 0x007A4847}}, + {{0x80000005, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {{0x80000006, 0}, Leaf{0x00000000, 0x00000000, 0x04008040, 0x00000000}}, + {{0x80000007, 0}, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000100}}, + {{0x80000008, 0}, Leaf{0x00003024, 0x00000000, 0x00000000, 0x00000000}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "GenuineIntel"); + EXPECT_EQ(info.family, 0x06); + EXPECT_EQ(info.model, 0x37); + EXPECT_EQ(info.stepping, 0x03); + EXPECT_EQ(GetX86Microarchitecture(&info), + X86Microarchitecture::INTEL_ATOM_SMT); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, " Intel(R) Celeron(R) CPU J1900 @ 1.99GHz"); + + EXPECT_TRUE(info.features.sse); + EXPECT_TRUE(info.features.sse2); + EXPECT_TRUE(info.features.sse3); +#if !defined(CPU_FEATURES_OS_WINDOWS) + // Currently disabled on Windows as IsProcessorFeaturePresent do not support + // feature detection > sse3. + EXPECT_TRUE(info.features.ssse3); + EXPECT_TRUE(info.features.sse4_1); + EXPECT_TRUE(info.features.sse4_2); +#endif // !defined(CPU_FEATURES_OS_WINDOWS) +} + +// https://www.felixcloutier.com/x86/cpuid#example-3-1--example-of-cache-and-tlb-interpretation +TEST_F(CpuidX86Test, P4_CacheInfo) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000002, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x00000F0A, 0x00010808, 0x00000000, 0x3FEBFBFF}}, + {{0x00000002, 0}, Leaf{0x665B5001, 0x00000000, 0x00000000, 0x007A7000}}, + }); + + const auto info = GetX86CacheInfo(); + EXPECT_EQ(info.size, 5); + + EXPECT_EQ(info.levels[0].level, UNDEF); + EXPECT_EQ(info.levels[0].cache_type, CPU_FEATURE_CACHE_TLB); + EXPECT_EQ(info.levels[0].cache_size, 4 * KiB); + EXPECT_EQ(info.levels[0].ways, UNDEF); + EXPECT_EQ(info.levels[0].line_size, UNDEF); + EXPECT_EQ(info.levels[0].tlb_entries, 64); + EXPECT_EQ(info.levels[0].partitioning, 0); + + EXPECT_EQ(info.levels[1].level, UNDEF); + EXPECT_EQ(info.levels[1].cache_type, CPU_FEATURE_CACHE_TLB); + EXPECT_EQ(info.levels[1].cache_size, 4 * KiB); + EXPECT_EQ(info.levels[1].ways, UNDEF); + EXPECT_EQ(info.levels[1].line_size, UNDEF); + EXPECT_EQ(info.levels[1].tlb_entries, 64); + EXPECT_EQ(info.levels[1].partitioning, 0); + + EXPECT_EQ(info.levels[2].level, 1); + EXPECT_EQ(info.levels[2].cache_type, CPU_FEATURE_CACHE_DATA); + EXPECT_EQ(info.levels[2].cache_size, 8 * KiB); + EXPECT_EQ(info.levels[2].ways, 4); + EXPECT_EQ(info.levels[2].line_size, 64); + EXPECT_EQ(info.levels[2].tlb_entries, UNDEF); + EXPECT_EQ(info.levels[2].partitioning, 0); + + EXPECT_EQ(info.levels[3].level, 1); + EXPECT_EQ(info.levels[3].cache_type, CPU_FEATURE_CACHE_INSTRUCTION); + EXPECT_EQ(info.levels[3].cache_size, 12 * KiB); + EXPECT_EQ(info.levels[3].ways, 8); + EXPECT_EQ(info.levels[3].line_size, UNDEF); + EXPECT_EQ(info.levels[3].tlb_entries, UNDEF); + EXPECT_EQ(info.levels[3].partitioning, 0); + + EXPECT_EQ(info.levels[4].level, 2); + EXPECT_EQ(info.levels[4].cache_type, CPU_FEATURE_CACHE_DATA); + EXPECT_EQ(info.levels[4].cache_size, 256 * KiB); + EXPECT_EQ(info.levels[4].ways, 8); + EXPECT_EQ(info.levels[4].line_size, 64); + EXPECT_EQ(info.levels[4].tlb_entries, UNDEF); + EXPECT_EQ(info.levels[4].partitioning, 2); +} + +// https://github.com/InstLatx64/InstLatx64/blob/master/GenuineIntel/GenuineIntel0000673_P3_KatmaiDP_CPUID.txt +TEST_F(CpuidX86Test, P3) { + // Pre AVX cpus don't have xsave + cpu().SetOsBackupsExtendedRegisters(false); +#if defined(CPU_FEATURES_OS_WINDOWS) + cpu().SetWindowsIsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE); +#elif defined(CPU_FEATURES_OS_MACOS) + cpu().SetDarwinSysCtlByName("hw.optional.sse"); +#elif defined(CPU_FEATURES_OS_FREEBSD) + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/var/run/dmesg.boot", R"( + ---<>--- +Copyright (c) 1992-2020 The FreeBSD Project. +FreeBSD is a registered trademark of The FreeBSD Foundation. + Features=0x1783fbff +real memory = 2147418112 (2047 MB) +)"); +#elif defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"( +flags : fpu mmx sse +)"); +#endif + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000003, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x00000673, 0x00000000, 0x00000000, 0x0387FBFF}}, + {{0x00000002, 0}, Leaf{0x03020101, 0x00000000, 0x00000000, 0x0C040843}}, + {{0x00000003, 0}, Leaf{0x00000000, 0x00000000, 0x4CECC782, 0x00006778}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "GenuineIntel"); + EXPECT_EQ(info.family, 0x06); + EXPECT_EQ(info.model, 0x07); + EXPECT_EQ(info.stepping, 0x03); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::X86_UNKNOWN); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, ""); + + EXPECT_TRUE(info.features.mmx); + EXPECT_TRUE(info.features.sse); + EXPECT_FALSE(info.features.sse2); + EXPECT_FALSE(info.features.sse3); +#if !defined(CPU_FEATURES_OS_WINDOWS) + // Currently disabled on Windows as IsProcessorFeaturePresent do not support + // feature detection > sse3. + EXPECT_FALSE(info.features.ssse3); + EXPECT_FALSE(info.features.sse4_1); + EXPECT_FALSE(info.features.sse4_2); +#endif // !defined(CPU_FEATURES_OS_WINDOWS) +} + +// https://github.com/InstLatx64/InstLatx64/blob/master/GenuineIntel/GenuineIntel0000480_486_CPUID.txt +TEST_F(CpuidX86Test, INTEL_80486) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000001, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x00000480, 0x00000000, 0x00000000, 0x00000003}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "GenuineIntel"); + EXPECT_EQ(info.family, 0x04); + EXPECT_EQ(info.model, 0x08); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::INTEL_80486); +} + +// https://github.com/InstLatx64/InstLatx64/blob/master/GenuineIntel/GenuineIntel0000526_P54C_CPUID.txt +TEST_F(CpuidX86Test, INTEL_P54C) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000001, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x00000525, 0x00000000, 0x00000000, 0x000001BF}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "GenuineIntel"); + EXPECT_EQ(info.family, 0x05); + EXPECT_EQ(info.model, 0x02); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::INTEL_P5); +} + +// https://github.com/InstLatx64/InstLatx64/blob/master/GenuineIntel/GenuineIntel0000590_Lakemont_CPUID2.txt +TEST_F(CpuidX86Test, INTEL_LAKEMONT) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000002, 0x756E6547, 0x6c65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x00000590, 0x00000000, 0x00010200, 0x8000237B}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "GenuineIntel"); + EXPECT_EQ(info.family, 0x05); + EXPECT_EQ(info.model, 0x09); + EXPECT_EQ(GetX86Microarchitecture(&info), + X86Microarchitecture::INTEL_LAKEMONT); +} + +// https://github.com/InstLatx64/InstLatx64/blob/master/GenuineIntel/GenuineIntel0050670_KnightsLanding_CPUID.txt +TEST_F(CpuidX86Test, INTEL_KNIGHTS_LANDING) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x0000000D, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x00050670, 0x02FF0800, 0x7FF8F3BF, 0xBFEBFBFF}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "GenuineIntel"); + EXPECT_EQ(info.family, 0x06); + EXPECT_EQ(info.model, 0x57); + EXPECT_EQ(GetX86Microarchitecture(&info), + X86Microarchitecture::INTEL_KNIGHTS_L); +} + +// TODO(user): test what happens when xsave/osxsave are not present. +// TODO(user): test what happens when xmm/ymm/zmm os support are not +// present. + +} // namespace +} // namespace cpu_features diff --git a/cpu_features/test/filesystem_for_testing.cc b/cpu_features/test/filesystem_for_testing.cc new file mode 100644 index 0000000..648a53e --- /dev/null +++ b/cpu_features/test/filesystem_for_testing.cc @@ -0,0 +1,103 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "filesystem_for_testing.h" + +#include +#include +#include +#include +#include + +namespace cpu_features { + +FakeFile::FakeFile(int file_descriptor, const char* content) + : file_descriptor_(file_descriptor), content_(content) {} + +FakeFile::~FakeFile() { assert(!opened_); } + +void FakeFile::Open() { + assert(!opened_); + opened_ = true; +} + +void FakeFile::Close() { + assert(opened_); + opened_ = false; +} + +int FakeFile::Read(int fd, void* buf, size_t count) { + assert(count < INT_MAX); + assert(fd == file_descriptor_); + const size_t remainder = content_.size() - head_index_; + const size_t read = count > remainder ? remainder : count; + memcpy(buf, content_.data() + head_index_, read); + head_index_ += read; + assert(read < INT_MAX); + return (int)read; +} + +void FakeFilesystem::Reset() { files_.clear(); } + +FakeFile* FakeFilesystem::CreateFile(const std::string& filename, + const char* content) { + auto& file = files_[filename]; + file = + std::unique_ptr(new FakeFile(next_file_descriptor_++, content)); + return file.get(); +} + +FakeFile* FakeFilesystem::FindFileOrNull(const std::string& filename) const { + const auto itr = files_.find(filename); + return itr == files_.end() ? nullptr : itr->second.get(); +} + +FakeFile* FakeFilesystem::FindFileOrDie(const int file_descriptor) const { + for (const auto& filename_file_pair : files_) { + FakeFile* const file_ptr = filename_file_pair.second.get(); + if (file_ptr->GetFileDescriptor() == file_descriptor) { + return file_ptr; + } + } + assert(false); + return nullptr; +} + +static FakeFilesystem* kFilesystem = new FakeFilesystem(); + +FakeFilesystem& GetEmptyFilesystem() { + kFilesystem->Reset(); + return *kFilesystem; +} + +extern "C" int CpuFeatures_OpenFile(const char* filename) { + auto* const file = kFilesystem->FindFileOrNull(filename); + if (file) { + file->Open(); + return file->GetFileDescriptor(); + } + return -1; +} + +extern "C" void CpuFeatures_CloseFile(int file_descriptor) { + kFilesystem->FindFileOrDie(file_descriptor)->Close(); +} + +extern "C" int CpuFeatures_ReadFile(int file_descriptor, void* buffer, + size_t buffer_size) { + return kFilesystem->FindFileOrDie(file_descriptor) + ->Read(file_descriptor, buffer, buffer_size); +} + +} // namespace cpu_features diff --git a/cpu_features/test/filesystem_for_testing.h b/cpu_features/test/filesystem_for_testing.h new file mode 100644 index 0000000..ef717fd --- /dev/null +++ b/cpu_features/test/filesystem_for_testing.h @@ -0,0 +1,61 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements a fake filesystem, useful for tests. +#ifndef CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_ +#define CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_ + +#include +#include +#include + +#include "internal/filesystem.h" + +namespace cpu_features { + +class FakeFile { + public: + explicit FakeFile(int file_descriptor, const char* content); + ~FakeFile(); + + void Open(); + void Close(); + int Read(int fd, void* buf, size_t count); + + int GetFileDescriptor() const { return file_descriptor_; } + + private: + const int file_descriptor_; + const std::string content_; + bool opened_ = false; + size_t head_index_ = 0; +}; + +class FakeFilesystem { + public: + void Reset(); + FakeFile* CreateFile(const std::string& filename, const char* content); + FakeFile* FindFileOrDie(const int file_descriptor) const; + FakeFile* FindFileOrNull(const std::string& filename) const; + + private: + int next_file_descriptor_ = 0; + std::unordered_map> files_; +}; + +FakeFilesystem& GetEmptyFilesystem(); + +} // namespace cpu_features + +#endif // CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_ diff --git a/cpu_features/test/hwcaps_for_testing.cc b/cpu_features/test/hwcaps_for_testing.cc new file mode 100644 index 0000000..fc0013d --- /dev/null +++ b/cpu_features/test/hwcaps_for_testing.cc @@ -0,0 +1,52 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "hwcaps_for_testing.h" + +#include + +#include "internal/string_view.h" + +namespace cpu_features { + +namespace { +static auto* const g_hardware_capabilities = new HardwareCapabilities(); +static const char* g_platform_pointer = nullptr; +static const char* g_base_platform_pointer = nullptr; +} // namespace + +void SetHardwareCapabilities(uint32_t hwcaps, uint32_t hwcaps2) { + g_hardware_capabilities->hwcaps = hwcaps; + g_hardware_capabilities->hwcaps2 = hwcaps2; +} +void SetPlatformPointer(const char* string) { g_platform_pointer = string; } +void SetBasePlatformPointer(const char* string) { + g_base_platform_pointer = string; +} + +void ResetHwcaps() { + SetHardwareCapabilities(0, 0); + SetPlatformPointer(nullptr); + SetBasePlatformPointer(nullptr); +} + +HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) { + return *g_hardware_capabilities; +} +const char* CpuFeatures_GetPlatformPointer(void) { return g_platform_pointer; } +const char* CpuFeatures_GetBasePlatformPointer(void) { + return g_base_platform_pointer; +} + +} // namespace cpu_features diff --git a/cpu_features/test/hwcaps_for_testing.h b/cpu_features/test/hwcaps_for_testing.h new file mode 100644 index 0000000..2138bac --- /dev/null +++ b/cpu_features/test/hwcaps_for_testing.h @@ -0,0 +1,31 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_ +#define CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_ + +#include "internal/hwcaps.h" + +namespace cpu_features { + +void SetHardwareCapabilities(uint32_t hwcaps, uint32_t hwcaps2); +void SetPlatformPointer(const char* string); +void SetBasePlatformPointer(const char* string); + +// To be called before each test. +void ResetHwcaps(); + +} // namespace cpu_features + +#endif // CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_ diff --git a/cpu_features/test/stack_line_reader_test.cc b/cpu_features/test/stack_line_reader_test.cc new file mode 100644 index 0000000..9ac5388 --- /dev/null +++ b/cpu_features/test/stack_line_reader_test.cc @@ -0,0 +1,132 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/stack_line_reader.h" + +#include "filesystem_for_testing.h" +#include "gtest/gtest.h" + +namespace cpu_features { + +bool operator==(const StringView& a, const StringView& b) { + return CpuFeatures_StringView_IsEquals(a, b); +} + +namespace { + +std::string ToString(StringView view) { return {view.ptr, view.size}; } + +TEST(StackLineReaderTest, Empty) { + auto& fs = GetEmptyFilesystem(); + auto* file = fs.CreateFile("/proc/cpuinfo", ""); + StackLineReader reader; + StackLineReader_Initialize(&reader, file->GetFileDescriptor()); + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_TRUE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("")); + } +} + +TEST(StackLineReaderTest, ManySmallLines) { + auto& fs = GetEmptyFilesystem(); + auto* file = fs.CreateFile("/proc/cpuinfo", "a\nb\nc"); + + StackLineReader reader; + StackLineReader_Initialize(&reader, file->GetFileDescriptor()); + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("a")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("b")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_TRUE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("c")); + } +} + +TEST(StackLineReaderTest, TruncatedLine) { + auto& fs = GetEmptyFilesystem(); + auto* file = fs.CreateFile("/proc/cpuinfo", R"(First +Second +More than 16 characters, this will be truncated. +last)"); + + StackLineReader reader; + StackLineReader_Initialize(&reader, file->GetFileDescriptor()); + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("First")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("Second")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_FALSE(result.full_line); + EXPECT_EQ(result.line, str("More than 16 cha")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_TRUE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("last")); + } +} + +TEST(StackLineReaderTest, TruncatedLines) { + auto& fs = GetEmptyFilesystem(); + auto* file = fs.CreateFile("/proc/cpuinfo", R"(More than 16 characters +Another line that is too long)"); + + StackLineReader reader; + StackLineReader_Initialize(&reader, file->GetFileDescriptor()); + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_FALSE(result.full_line); + EXPECT_EQ(result.line, str("More than 16 cha")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_FALSE(result.full_line); + EXPECT_EQ(result.line, str("Another line tha")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_TRUE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("")); + } +} + +} // namespace +} // namespace cpu_features diff --git a/cpu_features/test/string_view_test.cc b/cpu_features/test/string_view_test.cc new file mode 100644 index 0000000..772ac3f --- /dev/null +++ b/cpu_features/test/string_view_test.cc @@ -0,0 +1,202 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/string_view.h" + +#include "gtest/gtest.h" + +namespace cpu_features { + +bool operator==(const StringView& a, const StringView& b) { + return CpuFeatures_StringView_IsEquals(a, b); +} + +namespace { + +TEST(StringViewTest, Empty) { + EXPECT_EQ(kEmptyStringView.ptr, nullptr); + EXPECT_EQ(kEmptyStringView.size, 0); +} + +TEST(StringViewTest, Build) { + const auto view = str("test"); + EXPECT_EQ(view.ptr[0], 't'); + EXPECT_EQ(view.size, 4); +} + +TEST(StringViewTest, CpuFeatures_StringView_IndexOfChar) { + // Found. + EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(str("test"), 'e'), 1); + EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(str("test"), 't'), 0); + EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(str("beef"), 'e'), 1); + // Not found. + EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(str("test"), 'z'), -1); + // Empty. + EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(kEmptyStringView, 'z'), -1); +} + +TEST(StringViewTest, CpuFeatures_StringView_IndexOf) { + // Found. + EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("test"), str("es")), 1); + EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("test"), str("test")), 0); + EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("tesstest"), str("test")), 4); + // Not found. + EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("test"), str("aa")), -1); + // Empty. + EXPECT_EQ(CpuFeatures_StringView_IndexOf(kEmptyStringView, str("aa")), -1); + EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("aa"), kEmptyStringView), -1); +} + +TEST(StringViewTest, CpuFeatures_StringView_StartsWith) { + EXPECT_TRUE(CpuFeatures_StringView_StartsWith(str("test"), str("te"))); + EXPECT_TRUE(CpuFeatures_StringView_StartsWith(str("test"), str("test"))); + EXPECT_FALSE(CpuFeatures_StringView_StartsWith(str("test"), str("st"))); + EXPECT_FALSE(CpuFeatures_StringView_StartsWith(str("test"), str("est"))); + EXPECT_FALSE(CpuFeatures_StringView_StartsWith(str("test"), str(""))); + EXPECT_FALSE( + CpuFeatures_StringView_StartsWith(str("test"), kEmptyStringView)); + EXPECT_FALSE( + CpuFeatures_StringView_StartsWith(kEmptyStringView, str("test"))); +} + +TEST(StringViewTest, CpuFeatures_StringView_IsEquals) { + EXPECT_TRUE( + CpuFeatures_StringView_IsEquals(kEmptyStringView, kEmptyStringView)); + EXPECT_TRUE(CpuFeatures_StringView_IsEquals(kEmptyStringView, str(""))); + EXPECT_TRUE(CpuFeatures_StringView_IsEquals(str(""), kEmptyStringView)); + EXPECT_TRUE(CpuFeatures_StringView_IsEquals(str("test"), str("test"))); + EXPECT_TRUE(CpuFeatures_StringView_IsEquals(str("a"), str("a"))); + EXPECT_FALSE(CpuFeatures_StringView_IsEquals(str("a"), str("b"))); + EXPECT_FALSE(CpuFeatures_StringView_IsEquals(str("aa"), str("a"))); + EXPECT_FALSE(CpuFeatures_StringView_IsEquals(str("a"), str("aa"))); + EXPECT_FALSE(CpuFeatures_StringView_IsEquals(str("a"), kEmptyStringView)); + EXPECT_FALSE(CpuFeatures_StringView_IsEquals(kEmptyStringView, str("a"))); +} + +TEST(StringViewTest, CpuFeatures_StringView_PopFront) { + EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 2), str("st")); + EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 0), str("test")); + EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 4), str("")); + EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 100), str("")); +} + +TEST(StringViewTest, CpuFeatures_StringView_PopBack) { + EXPECT_EQ(CpuFeatures_StringView_PopBack(str("test"), 2), str("te")); + EXPECT_EQ(CpuFeatures_StringView_PopBack(str("test"), 0), str("test")); + EXPECT_EQ(CpuFeatures_StringView_PopBack(str("test"), 4), str("")); + EXPECT_EQ(CpuFeatures_StringView_PopBack(str("test"), 100), str("")); +} + +TEST(StringViewTest, CpuFeatures_StringView_KeepFront) { + EXPECT_EQ(CpuFeatures_StringView_KeepFront(str("test"), 2), str("te")); + EXPECT_EQ(CpuFeatures_StringView_KeepFront(str("test"), 0), str("")); + EXPECT_EQ(CpuFeatures_StringView_KeepFront(str("test"), 4), str("test")); + EXPECT_EQ(CpuFeatures_StringView_KeepFront(str("test"), 6), str("test")); +} + +TEST(StringViewTest, CpuFeatures_StringView_Front) { + EXPECT_EQ(CpuFeatures_StringView_Front(str("apple")), 'a'); + EXPECT_EQ(CpuFeatures_StringView_Front(str("a")), 'a'); +} + +TEST(StringViewTest, CpuFeatures_StringView_Back) { + EXPECT_EQ(CpuFeatures_StringView_Back(str("apple")), 'e'); + EXPECT_EQ(CpuFeatures_StringView_Back(str("a")), 'a'); +} + +TEST(StringViewTest, CpuFeatures_StringView_TrimWhitespace) { + EXPECT_EQ(CpuFeatures_StringView_TrimWhitespace(str(" first middle last ")), + str("first middle last")); + EXPECT_EQ(CpuFeatures_StringView_TrimWhitespace(str("first middle last ")), + str("first middle last")); + EXPECT_EQ(CpuFeatures_StringView_TrimWhitespace(str(" first middle last")), + str("first middle last")); + EXPECT_EQ(CpuFeatures_StringView_TrimWhitespace(str("first middle last")), + str("first middle last")); +} + +TEST(StringViewTest, CpuFeatures_StringView_ParsePositiveNumber) { + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("42")), 42); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("0x2a")), 42); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("0x2A")), 42); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("0x2A2a")), 10794); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("0x2a2A")), 10794); + + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("-10")), -1); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("-0x2A")), -1); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("abc")), -1); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("")), -1); +} + +TEST(StringViewTest, CpuFeatures_StringView_CopyString) { + char buf[4]; + buf[0] = 'X'; + + // Empty + CpuFeatures_StringView_CopyString(str(""), buf, sizeof(buf)); + EXPECT_STREQ(buf, ""); + + // Less + CpuFeatures_StringView_CopyString(str("a"), buf, sizeof(buf)); + EXPECT_STREQ(buf, "a"); + + // exact + CpuFeatures_StringView_CopyString(str("abc"), buf, sizeof(buf)); + EXPECT_STREQ(buf, "abc"); + + // More + CpuFeatures_StringView_CopyString(str("abcd"), buf, sizeof(buf)); + EXPECT_STREQ(buf, "abc"); +} + +TEST(StringViewTest, CpuFeatures_StringView_HasWord) { + // Find flags at beginning, middle and end. + EXPECT_TRUE( + CpuFeatures_StringView_HasWord(str("first middle last"), "first", ' ')); + EXPECT_TRUE( + CpuFeatures_StringView_HasWord(str("first middle last"), "middle", ' ')); + EXPECT_TRUE( + CpuFeatures_StringView_HasWord(str("first middle last"), "last", ' ')); + // Find flags at beginning, middle and end with a different separator + EXPECT_TRUE( + CpuFeatures_StringView_HasWord(str("first-middle-last"), "first", '-')); + EXPECT_TRUE( + CpuFeatures_StringView_HasWord(str("first-middle-last"), "middle", '-')); + EXPECT_TRUE( + CpuFeatures_StringView_HasWord(str("first-middle-last"), "last", '-')); + // Do not match partial flags + EXPECT_FALSE( + CpuFeatures_StringView_HasWord(str("first middle last"), "irst", ' ')); + EXPECT_FALSE( + CpuFeatures_StringView_HasWord(str("first middle last"), "mid", ' ')); + EXPECT_FALSE( + CpuFeatures_StringView_HasWord(str("first middle last"), "las", ' ')); +} + +TEST(StringViewTest, CpuFeatures_StringView_GetAttributeKeyValue) { + const StringView line = str(" key : first middle last "); + StringView key, value; + EXPECT_TRUE(CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)); + EXPECT_EQ(key, str("key")); + EXPECT_EQ(value, str("first middle last")); +} + +TEST(StringViewTest, FailingGetAttributeKeyValue) { + const StringView line = str("key first middle last"); + StringView key, value; + EXPECT_FALSE(CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)); +} + +} // namespace +} // namespace cpu_features diff --git a/docs/AUTHORS_RESUBMITTING_UNDER_LGPL_LICENSE.md b/docs/AUTHORS_RESUBMITTING_UNDER_LGPL_LICENSE.md new file mode 100644 index 0000000..354eaca --- /dev/null +++ b/docs/AUTHORS_RESUBMITTING_UNDER_LGPL_LICENSE.md @@ -0,0 +1,112 @@ +# List of authors resubmitting their contributions under LGPL-3.0-or-later + +VOLK is migrating from GNU General Public License v3.0 or later (GPL-3.0-or-later) +to GNU Lesser General Public License v3.0 or later (LGPL-3.0-or-later). + +This file is a list of the authors who agreed to resubmit their code +under the LGPL-3.0-or-later license to this repository. +In case the affected code is licensed differently than VOLK's current license (GPL-3.0-or-later), +this gives the VOLK project the right to use the current contributions under both LGPL-3.0-or-later and that other license. +Future contributions by these authors are, however, +licensed under LGPL-3.0-or-later, unless explicitly stated otherwise. + +## Current status + +We'd like to thank everyone who helped to re-license VOLK under LGPL! +Being technical: There are 3 people left (out of 74) who we haven't been able to get in contact with (at all), for a total of 4 (out of 1092) commits, 13 (of 282822) additions, and 7 (of 170421) deletions. We have reviewed these commits and all are simple changes (e.g., 1 line change) and most are no longer relevant (e.g., to a file that no longer exists). VOLK maintainers are in agreement that the combination -- small numbers of changes per committer, simple changes per commit, commits no longer relevant -- means that we can proceed with relicensing without the approval of the folks. We will try reaching out periodically to these folks, but we believe it unlikely we will get a reply. + +## How to add myself to the table of authors +There are several options. Choose the one you prefer. + +1. Open a PR and add yourself to the table. +2. Add a Github comment that clearly states your wish to be added to the table. +3. Write an email to the maintainers that clearly states your wish to be added to the table. + +### Information for option 2 and 3 + +If you go with option 2 or 3, the maintainers add your statement to the corresponding git commit message for documentation purposes. + +Suggested wording + +> I, {contributor name}, hereby resubmit all my contributions to the VOLK project and repository under the terms of the LGPLv3+. My GitHub handle is {github handle}, my email addresses used for contributions are: {email address}, ... . + + +## Table of authors + +Together with the date of agreement, these authors are: + +| Date | Author (as used in commits) | GitHub handle | Email address(es) | +|------------|-----------------------------|-----------------|---------------------------------------------------------------------| +| 2021-07-01 | Marcus Müller | marcusmueller | marcus@hostalia.de, mmueller@gnuradio.org, mueller@kit.edu | +| 2021-08-01 | Johannes Demel | jdemel | ufcsy@student.kit.edu, demel@uni-bremen.de, demel@ant.uni-bremen.de | +| 2021-08-25 | Martin Braun | mbr0wn | martin@gnuradio.org, martin.braun@ettus.com | +| 2021-08-27 | Carles Fernandez | carlesfernandez | carles.fernandez@cttc.es, carles.fernandez@gmail.com, carlesfernandez@gmail.com | +| 2021-08-27 | Magnus Lundmark | Ka-zam | magnus@skysense.io | +| 2021-09-04 | Michael Dickens | michaelld | mlk@alum.mit.edu, michael.dickens@ettus.com | +| 2021-09-05 | Andrej Rode | noc0lour | mail@andrejro.de | +| 2021-09-06 | rear1019 | rear1019 | rear1019@posteo.de | +| 2021-09-08 | Federico 'Larroca' La Rocca | git-artes | flarroca@fing.edu.uy | +| 2021-09-08 | Bernhard M. Wiedemann | bmwiedemann | bwiedemann@suse.de | +| 2021-09-08 | A. Maitland Bottoms | maitbot | bottoms@debian.org | +| 2021-09-08 | Paul Cercueil | pcercuei | paul.cercueil@analog.com | +| 2021-09-08 | Jeison Cardoso | jsonzilla, 0unit| cardoso.jeison@gmail.com | +| 2021-09-08 | Brennan Ashton | btashton | bashton@brennanashton.com | +| 2021-09-08 | Ryan Volz | ryanvolz | ryan.volz@gmail.com, rvolz@mit.edu | +| 2021-09-08 | Douglas Anderson | djanderson | douglas.j.anderson@gmail.com, djanderson@users.noreply.github.com | +| 2021-09-08 | Nicholas McCarthy | namccart | namccart@gmail.com | +| 2021-09-09 | Jaroslav Å karvada | yarda | jskarvad@redhat.com | +| 2021-09-09 | Vasil Velichkov | velichkov | vvvelichkov@gmail.com | +| 2021-09-09 | Takehiro Sekine | bstalk | takehiro.sekine@ps23.jp | +| 2021-09-10 | Vanya Sergeev | vsergeev | vsergeev@gmail.com | +| 2021-09-10 | Ben Hilburn | bhilburn | bhilburn@gnuradio.org, bhilburn@gmail.com, ben@ettus.com, ben@hilburn.dev | +| 2021-09-09 | Philip Balister | balister | philip@balister.org, root@usrp-e1xx.(none) | +| 2021-09-12 | Andrey Rodionov | dernasherbrezon | rodionovamp@mail.ru | +| 2021-09-13 | Clayton Smith | argilo | argilo@gmail.com | +| 2021-09-14 | Martin Kaesberger | mkaesberger | git@skipfish.de | +| 2021-09-08 | Karl Semich | xloem | 0xloem@gmail.com | +| 2021-09-23 | Mike Piscopo | ghostop14 | ghostop14@gmail.com | +| 2021-09-13 | Amr Bekhit | amrbekhit | amr@helmpcb.com, amrbekhit@gmail.com | +| 2021-09-19 | Eric Blossom | eblossom | eb@comsec.com | +| 2021-09-20 | Douglas Geiger | douggeiger | doug.geiger@bioradiation.net,douglas.geiger@nrl.navy.mil | +| 2021-09-21 | Zlika | Zlika | zlika_ese@hotmail.com | +| 2021-09-24 | Ron Economos | drmpeg | w6rz@comcast.net | +| 2021-09-26 | Marc Lichtman | 777arc | marcll@vt.edu | +| 2021-09-28 | Stefan Wunsch | stwunsch | stefan.wunsch@student.kit.edu | +| 2021-10-02 | Albin Stigö | ast | albin.stigo@gmail.com | +| 2021-10-09 | Florian Ritterhoff | fritterhoff | ritterho@hm.edu | +| 2021-10-06 | Sylvain Munaut | smunaut | tnt@246tNt.com, 246tnt@gmail.com | +| 2021-10-10 | Dan Robertson | dlrobertson | dan@dlrobertson.com | +| 2021-10-10 | Steve Markgraf | steve-m | steve@steve-m.de | +| 2021-10-10 | Gwenhael Goavec-Merou | trabucayre | gwenhael.goavec-merou@trabucayre.com | +| 2021-10-10 | Doron Behar | doronbehar | doron.behar@gmail.com | +| 2021-10-10 | Brandon Enochs | brandonenochs | brandon.enochs@nrl.navy.mil | +| 2021-10-10 | Thomas Rondeau | trondeau | tom@trondeau.com, trondeau@vt.edu | +| 2021-10-11 | Tim O'Shea | oshtim | tim.oshea753@gmail.com | +| 2021-10-13 | Alexey Slokva | Alesha72003 | Alesha72003@ya.ru | +| 2021-10-17 | Josh Blum | guruofquality | josh@joshknows.com | +| 2021-10-14 | Nicholas Corgan | ncorgan | n.corgan@gmail.com, nick.corgan@ettus.com | +| 2021-10-24 | Andy Walls | awalls-cx18 | andy@silverblocksystems.net, awalls.cx18@gmail.com, awalls@md.metrocast.net | +| 2021-10-26 | Ettus Research | N/A | nick.corgan@ettus.com, nick@ettus.com, ben.hilburn@ettus.com, michael.dickens@ettus.com | +| 2021-10-27 | Pascal Giard | evilynux | evilynux@gmail.com, pascal.giard@lacime.etsmtl.ca, pascal.giard@etsmtl.ca, pascal.giard@mail.mcgill.ca | +| 2021-10-27 | The Aerospace Corporation | N/A | oss@aero.org, damian.miralles@aero.org, jessica.iwamoto@aero.org, kyle.a.logue@aero.org | +| 2021-10-29 | Christoph Mayer | hcab14 | hcab14@gmail.com, Christoph.Mayer@cern.ch | +| 2021-10-29 | Valerii Zapodovnikov | ValZapod | val.zapod.vz@gmail.com | +| 2021-10-29 | Stefan Oltmanns | Stefan-Olt | stefan-oltmanns@gmx.net | +| 2021-10-29 | Mathieu Rene | mrene | mrene@avgs.ca | +| 2021-10-29 | Steven Behnke | sbehnke | steven_behnke@me.com | +| 2021-10-29 | Adam Thompson | awthomp | adamt@nvidia.com | +| 2021-10-29 | Nick Foster | bistromath | bistromath@gmail.com, nick@nerdnetworks.org | +| 2021-10-29 | Roy Thompson | roythompson | rthompso@gmail.com | +| 2021-10-29 | luzpaz | luzpaz | luzpaz@users.noreply.github.com | +| 2021-10-29 | Damian Miralles | dmiralles2009 | dmiralles2009@gmail.com, damian.miralles@colorado.edu | +| 2021-11-06 | Julien Olivain | jolivain | julien.olivain@lsv.ens-cachan.fr | +| 2021-11-07 | Jam M. Hernandez Quiceno | JamMarHer | jamarck96@gmail.com, jam_quiceno@partech.com | +| 2021-11-19 | Abhishek Bhowmick | abhowmick22 | abhowmick22@gmail.com | +| 2021-11-29 | Aang23 (Alan) | Aang23 | qwerty15@gmx.fr, aang23@altillimity.com | +| 2022-02-06 | Nathan West | n-west | nwest@deepsig.io, nate.ewest@gmail.com, nathan.west@gnuradio.org, nathan.west@nrl.navy.mil, nathan.west@okstate.edu, nathan@pepper | +| 2022-04-01 | Albert Holguin | hades- | aholguin_77@yahoo.com, codename.hash@gmail.com | +| 2022-04-14 | Elliot Briggs | N/A | elliot.briggs@gmail.com | +| 2022-05-31 | Geoffrey Nieboer | gnieboer | gnieboer@gcndevelopment.com, gnieboer@corpcomm.net | +| 2022-06-28 | Moritz Fischer | N/A | gnuradio@pure-entropy.org | +| 2022-06-29 | Johnathan Corgan | jmcorgan | jcorgan@corganenterprises.com, johnathan@corganlabs.com | +| | | | | diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md new file mode 100644 index 0000000..7e6a987 --- /dev/null +++ b/docs/CHANGELOG.md @@ -0,0 +1,656 @@ +# Changelog +All notable changes to VOLK will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html), starting with version 2.0.0. + + +## [2.0.0] - 2019-08-06 + +This is the first version to use semantic versioning. It starts the logging of changes. + + +## [2.1.0] - 2019-12-22 + +Hi everyone, + +we would like to announce that Michael Dickens and Johannes Demel are the new VOLK maintainers. We want to review and merge PRs in a timely manner as well as commenting on issues in order to resolve them. + +We want to thank all contributors. This release wouldn't have been possible without them. + +We're curious about VOLK users. Especially we'd like to learn about VOLK users who use VOLK outside GNU Radio. + +If you have ideas for VOLK enhancements, let us know. Start with an issue to discuss your idea. We'll be happy to see new features get merged into VOLK. + +### Highlights + +VOLK v2.1.0 is a collection of really cool changes. We'd like to highlight some of them. + +- The AVX FMA rotator bug is fixed +- VOLK offers `volk::vector<>` for C++ to follow RAII +- Move towards modern dependencies + - CMake 3.8 + - Prefer Python3 + - We will drop Python2 support in a future release! + - Use C++17 `std::filesystem` + - This enables VOLK to be built without Boost if available! +- more stable CI +- lots of bugfixes +- more optimized kernels, especially more NEON versions + +### Contributors + +* Albin Stigo +* Amr Bekhit +* Andrej Rode +* Carles Fernandez +* Christoph Mayer +* Clayton Smith +* Damian Miralles +* Johannes Demel +* Marcus Müller +* Michael Dickens +* Philip Balister +* Takehiro Sekine +* Vasil Velichkov +* luz.paz + + +### Changes + +* Usage + - Update README to reflect how to build on Raspberry Pi and the importance of running volk_profile + +* Toolchain + - Add toolchain file for Raspberry Pi 3 + - Update Raspberry 4 toolchain file + +* Kernels + - Add neonv7 to volk_16ic_magnitude_16i + - Add neonv7 to volk_32fc_index_max_32u + - Add neonv7 to volk_32fc_s32f_power_spectrum_32f + - Add NEONv8 to volk_32f_64f_add_64f + - Add Neonv8 to volk_32fc_deinterleave_64f_x2 + - Add volk_32fc_x2_s32fc_multiply_conjugate_add_32fc + - Add NEONv8 to volk_32fc_convert_16ic + +* CI + - Fix AVX FMA rotator + - appveyor: Enable testing on windows + - Fixes for flaky kernels for more reliable CI + - volk_32f_log2_32f + - volk_32f_x3_sum_of_poly_32f + - volk_32f_index_max_{16,32}u + - volk_32f_8u_polarbutterflypuppet_32f + - volk_8u_conv_k7_r2puppet_8u + - volk_32fc_convert_16ic + - volk_32fc_s32f_magnitude_16i + - volk_32f_s32f_convert_{8,16,32}i + - volk_16ic_magnitude_16i + - volk_32f_64f_add_64f + - Use Intel SDE to test all kernels + - TravisCI + - Add native tests on arm64 + - Add native tests on s390x and ppc64le (allow failure) + +* Build + - Build Volk without Boost if C++17 std::filesystem or std::experimental::filesystem is available + - Update to more modern CMake + - Prevent CMake to choose previously installed VOLK headers + - CMake + - bump minimum version to 3.8 + - Use sha256 instead of md5 for unique target name hash + - Python: Prefer Python3 over Python2 if available + +* C++ + - VOLK C++ allocator and C++11 std::vector type alias added +\n +## [2.2.0] - 2020-02-16 + +Hi everyone, + +we have a new VOLK release v2.2.0! + +We want to thank all contributors. This release wouldn't have been possible without them. + +We're curious about VOLK users. Especially we'd like to learn about VOLK users who use VOLK outside GNU Radio. + +If you have ideas for VOLK enhancements, let us know. Start with an issue to discuss your idea. We'll be happy to see new features get merged into VOLK. + +The v2.1.0 release was rather large because we had a lot of backlog. We aim for more incremental releases in order to get new features out there. + +### Highlights + +VOLK v2.2.0 updates our build tools and adds support functionality to make it easier to use VOLK in your projects. + +* Dropped Python 2 build support + - Removed Python six module dependency +* Use C11 aligned_alloc whenever possible + - MacOS `posix_memalign` fall-back + - MSVC `_aligned_malloc` fall-back +* Add VOLK version in `volk_version.h` (included in `volk.h`) +* Improved CMake code +* Improved code with lots of refactoring and performance tweaks + +### Contributors + +* Carles Fernandez +* Gwenhael Goavec-Merou +* Albin Stigo +* Johannes Demel +* Michael Dickens +* Valerii Zapodovnikov +* Vasil Velichkov +* ghostop14 +* rear1019 + +### Changes + +* CMake + - Fix detection of AVX and NEON + - Fix for macOS + - lib/CMakeLists: use __asm__ instead of asm for ARM tests + - lib/CMakeLists: fix detection when compiler support NEON but nor neonv7 nor neonv8 + - lib/CMakeLists.txt: use __VOLK_ASM instead of __asm__ + - lib/CMakeLists.txt: let VOLK choose preferred neon version when both are supported + - lib/CMakeLists.txt: simplify neon test support. Unset neon version if not supported + - For attribute, change from clang to "clang but not MSC" +* Readme + - logo: Add logo at top of README.md +* Build dependencies + - python: Drop Python2 support + - python: Reduce six usage + - python: Move to Python3 syntax and modules + - six: Remove build dependency on python six +* Allocation + - alloc: Use C11 aligned_alloc + - alloc: Implement fall backs for C11 aligned_alloc + - alloc: Fix for incomplete MSVC standard compliance + - alloc: update to reflect alloc changes +* Library usage + - Fixup VolkConfigVersion + - add volk_version.h +* Refactoring + - qa_utils.cc: fix always false expression + - volk_prefs.c: check null realloc and use temporary pointer + - volk_profile.cc: double assignment and return 0 + - volk_32f_x2_pow_32f.h: do not need to _mm256_setzero_ps() + - volk_8u_conv_k7_r2puppet_8u.h: d_polys[i] is positive + - kernels: change one iteration for's to if's + - kernels: get rid of some assignments + - qa_utils.cc: actually throw something + - qa_utils.cc: fix always true code + - rotator: Refactor AVX kernels + - rotator: Remove unnecessary variable + - kernel: Refactor square_dist_scalar_mult + - square_dist_scalar_mult: Speed-Up AVX, Add unaligned + - square_dist_scalar_mult: refactor AVX2 kernel + - kernel: create AVX2 meta intrinsics +* CI + - appveyor: Test with python 3.4 and 3.8 + - appveyor: Add job names + - appveyor: Make ctest more verbose +* Performance + - Improve performance of generic kernels with complex multiply + - square_dist_scalar_mult: Add SSE version + - Adds NEON versions of cos, sin and tan + +## [2.2.1] - 2020-02-24 + +Hi everyone, + +with VOLK 2.2.0, we introduced another AVX rotator bug which is fixed with this release. +In the process 2 more bugs were identified and fixed. Further, we saw some documentation improvements. + + +### Contributors + +* Clayton Smith +* Michael Dickens + + +### Changes + + +* Fix loop bound in AVX rotator +* Fix out-of-bounds read in AVX2 square dist kernel +* Fix length checks in AVX2 index max kernels +* includes: rearrange attributes to simplify macros Whitespace +* kernels: fix usage in header comments +\n +## [2.3.0] - 2020-05-09 + +Hi everyone! + +VOLK 2.3 is out! We want to thank all contributors. This release wouldn't have been possible without them. We saw lots of great improvements. + +On GNU Radio 'master' VOLK was finally removed as a submodule. + +Currently we see ongoing discussions on how to improve CPU feature detection because VOLK is not as reliable as we'd like it to be in that department. We'd like to benefit from other open source projects and don't want to reinvent the wheel. Thus, one approach would be to include `cpu_features` as a submodule. + +### Highlights + +* Better reproducible builds +* CMake improvements + - ORC is removed from the public interface where it was never supposed to be. + - CMake fixes for better usability +* Updated and new CI chain + - TravisCI moved to new distro and does more tests for newer GCC/Clang + - Github Actions + - Add Action to check proper code formatting. + - Add CI that also runs on MacOS with XCode. +* Enforce C/C++ coding style via clang-format +* Kernel fixes + - Add puppet for `power_spectral_density` kernel + - Treat the `mod_range` puppet as a puppet for correct use with `volk_profile` + - Fix `index_max` kernels + - Fix `rotator`. We hope that we finally found the root cause of the issue. +* Kernel optimizations + - Updated log10 calcs to use faster log2 approach + - Optimize `complexmultiplyconjugate` +* New kernels + - accurate exp kernel is finally merged after years + - Add 32f_s32f_add_32f kernel to perform vector + scalar float operation + +### Contributors + +* Bernhard M. Wiedemann +* Clayton Smith +* Johannes Demel +* Michael Dickens +* Tom Rondeau +* Vasil Velichkov +* ghostop14 + +### Changes + +* Reproducible builds + - Drop compile-time CPU detection + - Drop another instance of compile-time CPU detection +* CI updates + - ci: Add Github CI Action + - ci: Remove Ubuntu 16.04 GCC5 test on TravisCI + - TravisCI: Update CI to bionic distro + - TravisCI: Add GCC 8 test + - TravisCI: Structure CI file + - TravisCI: Clean-up .travis.yml +* Enforce C/C++ coding style + - clang-format: Apply clang-format + - clang-format: Update PR with GitHub Action + - clang-format: Rebase onto current master +* Fix compiler warnings + - shadow: rebase kernel fixes + - shadow: rebase volk_profile fix +* CMake + - cmake: Remove the ORC from the VOLK public link interface + - versioning: Remove .Maint from libvolk version + - Fix endif macro name in comment +* Kernels + - volk: accurate exp kernel + - exp: Rename SSE4.1 to SSE2 kernel + - Add 32f_s32f_add_32f kernel + - This kernel adds in vector + scalar functionality + - Fix the broken index max kernels + - Treat the mod_range puppet as such + - Add puppet for power spectral density kernel + - Updated log10 calcs to use faster log2 approach + - fix: Use unaligned load + - divide: Optimize complexmultiplyconjugate + + +## [2.4.0] - 2020-11-22 + +Hi everyone! + +We have another VOLK release! We're happy to announce VOLK v2.4.0! We want to thank all contributors. This release wouldn't have been possible without them. + +We introduce `cpu_features` as a private submodule in this release because we use it to detect CPU features during runtime now. This should enable more reliable feature detection. Further, it takes away the burden to implement platform specific code. As a result, optimized VOLK kernels build for MacOS and Windows with AppleClang/MSVC out-of-the-box now. + + +### Highlights + +* Documentation + - Update README to be more verbose and to improve usefulness. + +* Compilers + - MSVC: Handle compiler flags and thus architecture specific kernels correctly. This enables optimized kernels with MSVC builds. + - AppleClang: Treat AppleClang as Clang. + - Paired with the `cpu_features` introduction, this enables us to use architecture specific kernels on a broader set of platforms. +* CI + - Update to newest version of the Intel SDE + - Test the rotator kernel with a realistic scalar + - Introduce more test cases with newer GCC and newer Clang versions. +* CMake + - Enable to not install `volk_modtool`. + - Remove "find_package_handle_standard_args" warning. +* cpu_features + - Use `cpu_features` v0.6.0 as a private submodule to detect available CPU features. + - Fix incorrect feature detection for newer AVX versions. + - Circumvent platform specific feature detection. + - Enable more architecture specific kernels on more platforms. +* Kernels + - Disable slow and broken SSE4.1 kernel in `volk_32fc_x2_dot_prod_32fc` + - Adjust min/max for `32f_s32f_convert_8i` kernel + - Use `INT8_*` instead of `CHAR_*` + + +### Contributors + +* Adam Thompson +* Andrej Rode +* Christoph Mayer +* Clayton Smith +* Doron Behar +* Johannes Demel , +* Martin Kaesberger +* Michael Dickens +* Ron Economos + + +### Changes + +* Documentation + - Update README to include ldconfig upon volk build and install completion + - Update README based on review + - readme: Fix wording + - docs: Fix conversion inaccuracy + +* MSVC + - archs: MSVC 2013 and greater don't have a SSE flag + +* CI + - update to newest version of the Intel SDE + - Test the rotator kernel with a realistic scalar + +* CMake + - build: Enable to not install volk_modtool + - cmake: Remove "find_package_handle_standard_args" warning. + - cmake: Ensure that cpu_features is built as a static library. + +* cpu_features + - readme: Add section on supported platforms + - readme: Make supported compiler section more specific + - travis: Add GCC 9 test on focal + - travis: Add tests for clang 8, 9, 10 + - travis: Fix incorrect CXX compiler assignment + - cpu_features: Remove unused feature checks + - ci: Update TravisCI for cpu_features + - cpu_features: Fix MSVC build + - pic: Fix BUILD_PIC issue + - ci: Update CI system configuration + - cpu_features: Bump submodule pointer to v0.6.0 + - docs: Add hints how to handle required submodules + - cpu_features: Switch to cpu_features + - ci: Update CI system for cpu_features + - cpu_features: Force PIC build flag + - appveyor: Add recursive clone command + - cpu_features: Remove xgetbv checks + - pic: Cache and force BUILD_PIC + - ci: Remove explicit BUILD_PIC from cmake args + - ci: Remove BUILD_PIC from CI cmake args + - cpu_features: Remove commented code + - cpu_features: Assume AppleClang == Clang + - cpu_features: Remove obsolete code in archs.xml + - fix for ARM cross-compiling CI + - ARM CI: remove unneeded environment variables + +* Housekeeping + - structure: Move release scripts to scripts folder + +* Kernels + - emit an emms instruction after using the mmx extension + - volk_32fc_x2_dot_prod_32fc: disable slow & broken SSE4.1 kernel + - fix: Adjust min/max for 32f_s32f_convert_8i kernel + - fix: Use INT8_* instead of CHAR_* + + +## [2.4.1] - 2020-12-17 + +Hi everyone! + +We have a new VOLK bugfix release! We are happy to announce VOLK v2.4.1! We want to thank all contributors. This release wouldn't have been possible without them. + +Our v2.4.0 release introduced quite a lot of changes under the hood. With this bugfix release, we want to make sure that everything works as expected again. + + +### Contributors + +* A. Maitland Bottoms +* Johannes Demel +* Michael Dickens +* Philip Balister +* Ron Economos +* Ryan Volz + + +### Changes + +* Build + - cpu_features CMake option + - Add cpu_features to static library build. + - Use static liborc-0.4 library for static library build. + - cmake: Detect if cpu_features submodule is present. + +* Install + - Check for lib64 versus lib and set LIB_SUFFIX accordingly. + +* CI + - Add CI test for static library build. + +* Releases + - project: Include git submodules (i.e. cpu_features) in release tarball. + - scripts: Add GPG signature to release script + +* Other + - readme: Update TravisCI status badge + + +## [2.5.0] - 2021-06-05 + +Hi everyone! + +We have a new VOLK release! We are happy to announce VOLK v2.5.0! We want to thank all contributors. This release wouldn't have been possible without them. + +This release adds new kernel implementations and fixes. Some of these were longstanding PRs that could only be merged recently thanks to our switch from CLA to DCO. + +### Announcements + +I would like to point out one upcoming change. After this release, we will rename our development branch to `main` as discussed in [issue #461](https://github.com/gnuradio/volk/issues/461). + + +I'd like to point the community to this [VOLK relicensing GREP](https://github.com/gnuradio/greps/pull/33). +This is an ongoing effort to relicense VOLK under LGPLv3. +We're looking for people and organizations that are interested in leading this effort. + +### Contributors + +* Aang23 +* Carles Fernandez +* Florian Ritterhoff +* Jam M. Hernandez Quiceno , +* Jaroslav Å karvada +* Johannes Demel +* Magnus Lundmark +* Michael Dickens +* Steven Behnke +* alesha72003 +* dernasherbrezon +* rear1019 + + +### Changes + +* Kernels + - volk_32f_stddev_and_mean_32f_x2: implemented Young and Cramer's algorithm + - volk_32fc_accumulator_s32fc: Add new kernel + - volk_16ic_x2_dot_prod_16ic_u_avx2: Fix Typo, was `_axv2`. + - Remove _mm256_zeroupper() calls + - Enforce consistent function prototypes + - 32fc_index_max: Improve speed of AVX2 version + - conv_k7_r2: Disable broken AVX2 code + - improve volk_8i_s32f_convert_32f for ARM NEON + - Calculate cos in AVX512F + - Calculate sin using AVX512F + + +* Compilers + - MSVC + - Fix MSVC builds + - GCC + - Fix segmentation fault when using GCC 8 + - MinGW + - add support and test for MinGW/MSYS2 + +* The README has received several improvements + +* Build + - Fix python version detection + - cmake: Check that 'distutils' is available + - c11: Remove pre-C11 preprocessor instructions + +* CI + - Add more CI to GitHub Actions + - Remove redundant tests from TravisCI + - Add non-x86 GitHub Actions + - Update compiler names in CI + - Disable fail-fast CI + - Add more debug output to tests + +* Contributing + - contributing: Add CONTRIBUTING.md and DCO.txt + + +## [2.5.1] - 2022-02-12 + +Hi everyone! + +We have a new VOLK release! We are happy to announce VOLK v2.5.1! We want to thank all contributors. This release wouldn't have been possible without them. + +The list of contributors is pretty long this time due to a lot of support to relicense VOLK under LGPL. Currently, we are "almost there" but need a few more approvals, please support us. We thank everyone for their support in this effort. + +We use `cpu_features` for a while now. This maintainance release should make it easier for package maintainers, FreeBSD users, and M1 users to use VOLK. Package maintainers should be able to use an external `cpu_features` module. For everyone else, `cpu_features` received support for M1 and FreeBSD. + +You can find [VOLK on Zenodo DOI: 10.5281/zenodo.3360942](https://doi.org/10.5281/zenodo.3360942). +We started to actively support Zenodo now via a `.zenodo.json` file. This might come in handy for people who use VOLK in publications. As a contributor, if you want more information about yourself added to VOLK, feel free to add your ORCiD and affiliation. + +In the past, we relied on Boost for several tasks in `volk_profile`. For years, we minimized Boost usage to `boost::filesystem`. We mostly switched to C++17 `std::filesystem` years ago. The last distribution in our CI system that required Boost to build VOLK, was Ubuntu 14.04. Thus, now is the time to remove the Boost dependency completely and rely on C++17 features. + +Some VOLK kernels are untested for years. We decided to deprecate these kernels but assume that nobody uses them anyways. If your compiler spits out a warning that you use a deprecated kernel, get in touch. Besides, we received fixes for various kernels. Especially FEC kernels are notoriously difficult to debug because issues often pop up as performance regressions. + +Finally, we saw a lot of housekeeping in different areas. Scripts to support us in our LGPL relicensing effort, updated docs, and updated our code of conduct. We could remove some double entries in our QA system and fixed a `volk_malloc` bug that ASAN reported. +Finally, we switched to the Python `sysconfig` module in our build system to ensure Python 3.12+ does not break our builds. + + + +### Contributors + +* A. Maitland Bottoms +* Aang23 +* AlexandreRouma +* Andrej Rode +* Ben Hilburn +* Bernhard M. Wiedemann +* Brennan Ashton +* Carles Fernandez +* Clayton Smith +* Doug +* Douglas Anderson +* Florian Ritterhoff +* Jaroslav Å karvada +* Johannes Demel +* Josh Blum +* Kyle A Logue +* Luigi Cruz +* Magnus Lundmark +* Marc L +* Marcus Müller +* Martin Kaesberger +* Michael Dickens +* Nathan West +* Paul Cercueil +* Philip Balister +* Ron Economos +* Ryan Volz +* Sylvain Munaut +* Takehiro Sekine +* Vanya Sergeev +* Vasil Velichkov +* Zlika +* namccart +* dernasherbrezon +* rear1019 + + +### Changes + +* Kernels + - Fixup underperforming GENERIC kernel for volk_8u_x4_conv_k7_r2_8u + - volk_32fc_x2_conjugate_dot_prod_32fc: New generic implementation + - Add volk_32f(c)_index_min_16/32u + - Fix volk_32fc_index_min_32u_neon + - Fix volk_32fc_index_min_32u_neon + +* Misc + - Fix volk_malloc alignment bug + - qa: Remove repeating tests + - python: Switch to sysconfig module + - deprecate: Add attribute deprecated + - deprecate: Exclude warnings on Windows + - docs: Update docs + - Add the list of contributors agreeing to LGPL licensing + - Add a script to count the lines that are pending resubmission + - Testing: Add test for LGPL licensing + - Update CODE_OF_CONDUCT file + +* Boost + - boost: Remove boost dependency + - c++: Require C++17 for std::filesystem + +* cpu_features + cpu_features: Update submodule pointer + cpu_features: Make cpu_features submodule optional + +* Zenodo + zenodo: Add metadata file + zenodo: Re-organize .zenodo.json + +## [2.5.2] - 2022-09-04 + +Hi everyone! + +We have a new VOLK release! We are happy to announce VOLK v2.5.2! We want to thank all contributors. This release wouldn't have been possible without them. + +We are happy to announce that our re-licensing effort is complete. This has been a long and challenging journey. Being technical: There are 3 people left (out of 74) who we haven't been able to get in contact with (at all), for a total of 4 (out of 1092) commits, 13 (of 282822) additions, and 7 (of 170421) deletions. We have reviewed these commits and all are simple changes (e.g., 1 line change) and most are no longer relevant (e.g., to a file that no longer exists). VOLK maintainers are in agreement that the combination -- small numbers of changes per committer, simple changes per commit, commits no longer relevant -- means that we can proceed with re-licensing without the approval of the folks. We will try reaching out periodically to these folks, but we believe it unlikely we will get a reply. + +This maintainance release is intended to be the last VOLK 2.x release. After we completed our re-licensing effort, we want to make a VOLK 3.0 release soon. VOLK 3.0 will be fully compatible with VOLK 2.x on a technical level. However, VOLK 3+ will be released under LGPL. We are convinced a license change justifies a major release. + +### Contributors + +* Aang23 +* Clayton Smith +* Johannes Demel , +* Michael Dickens +* Michael Roe + +### Changes + +* Android + - Add Android CI + - Fix armeabi-v7a on Android +* CI + - Update all test jobs to more recent actions +* volk_8u_x4_conv_k7_r2_8u + - Add NEON implementation `neonspiral` via `sse2neon.h` +* Fixes + - Fix out-of-bounds reads + - Fix broken neon kernels + - Fix float to int conversion +* CMake + - Suppress superfluous warning + - Fix Python install path calculation and documentation +* cpu_features + - Update submodule pointer +* VOLK 3.0 release preparations + - Use SPDX license identifiers everywhere + - Re-arrange files in top-level folder + - Update Doxygen and all Doxygen related tasks into `docs` diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt new file mode 100644 index 0000000..90eb1d7 --- /dev/null +++ b/docs/CMakeLists.txt @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022 Johannes Demel. +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +find_package(Doxygen) +if(DOXYGEN_FOUND) + +message(STATUS "Doxygen found. Building docs ...") + +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in + ${CMAKE_BINARY_DIR}/Doxyfile +@ONLY) + +add_custom_target(volk_doc + ${DOXYGEN_EXECUTABLE} ${CMAKE_BINARY_DIR}/Doxyfile + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "Generating documentation with Doxygen" VERBATIM +) + +endif(DOXYGEN_FOUND) diff --git a/docs/CODE_OF_CONDUCT.md b/docs/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..2d20b6b --- /dev/null +++ b/docs/CODE_OF_CONDUCT.md @@ -0,0 +1,127 @@ +# VOLK Code of Conduct + +This Code of Conduct is adapted from the GNU Radio Code of Conduct, +available in the [gr-governance repository][GRCoC], dated 2021-07-12. + +## Our Pledge + +We as members, contributors, and leaders strive to make participation +in our community a harassment-free experience for everyone. Qualities +and characteristics we note specifically include regardless of age, +body size, visible or invisible disability, visible or invisible +enhancement, ethnicity, sex characteristics, gender identity, gender +expression, sexual identity, sexual orientation, level of experience, +education, socio-economic status, nationality, personal appearance, +race, caste, color, religion, and military status -- but lists like +these cannot be complete and hence we note "**for everyone**" in order +to be as inclusive as possible. + +We pledge to act and interact in ways that contribute to an open, +welcoming, diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for +our community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Project maintainers are responsible for clarifying and enforcing our +standards of acceptable behavior and will take appropriate and fair +corrective action in response to any behavior that they deem +inappropriate, threatening, offensive, or harmful. + +Project maintainers have the right and responsibility to remove, edit, +or reject comments, commits, code, wiki edits, issues, and other +contributions that are not aligned to this Code of Conduct, and will +communicate reasons for moderation decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also +applies when an individual is officially representing the community in +public spaces. Examples of representing our community include using +an official e-mail address, posting via an official social media +account, or acting as an appointed representative at an online or +offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior +may be reported by contacting the project team at +**conduct@gnuradio.org** . All complaints will be reviewed and +investigated promptly and fairly. + +All complain handlers are obligated to respect the privacy and +security of the reporter of any incident. + +## Enforcement Guidelines + +Project maintainers will follow these Community Impact Guidelines in +determining the consequences for any action they deem in violation of +this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior +deemed unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from Project maintainers, +providing clarity around the nature of the violation and an +explanation of why the behavior was inappropriate. A public apology +may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued +behavior. No interaction with the people involved, including +unsolicited interaction with those enforcing the Code of Conduct, for +a specified period of time. This includes avoiding interactions in +community spaces as well as external channels like social +media. Violating these terms may lead to a temporary or permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, +including sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or +public communication with the community for a specified period of +time. No public or private interaction with the people involved, +including unsolicited interaction with those enforcing the Code of +Conduct, is allowed during this period. Violating these terms may +lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of +community standards, including sustained inappropriate behavior, +harassment of an individual, or aggression toward or disparagement of +classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction +within the community. + +[GRCoC]: https://github.com/gnuradio/gr-governance/blob/main/CODE_OF_CONDUCT.md diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md new file mode 100644 index 0000000..699b6ec --- /dev/null +++ b/docs/CONTRIBUTING.md @@ -0,0 +1,78 @@ +# Contributing to VOLK + +Welcome! You are reading about how to contribute code to VOLK. First of +all, we are very happy that you're about to contribute, and welcome your +submissions! We hope many more will come. + +In this document, we will explain the main things to consider when submitting +pull requests against VOLK. Reading this first will help a lot with +streamlining the process of getting your code merged. + +There is also a [wiki-based version of this file][wikicontrib], which contains +more detail. VOLK is part of the GNU Radio project and as such, it follows the +same contribution guidelines. This file is an [adopted GNU Radio checklist][gnuradiocontrib]. + +## What about non-code contributions? + +Those are at least as important as code contributions: Emails to the mailing +list, answers on Stack Overflow, Wiki page edits, examples... We very much +appreciate those. However, this document is specifically about contributing +code. + +## DCO Signed? + +Any code contributions going into VOLK will become part of an LGPL-licensed +(former contributions are GPL-licensed), open source repository. It is therefore +imperative that code submissions belong to the authors, and that submitters have +the authority to merge that code into the public VOLK codebase. + +For that purpose, we use the [Developer's Certificate of Origin](DCO.txt). It +is the same document used by other projects. Signing the DCO states that there +are no legal reasons to not merge your code. + +To sign the DCO, suffix your git commits with a "Signed-off-by" line. When +using the command line, you can use `git commit -s` to automatically add this +line. If there were multiple authors of the code, or other types of +stakeholders, make sure that all are listed, each with a separate Signed-off-by +line. + +## Coding Guidelines + +We have codified our coding guidelines in [GNU Radio GREP1][grep1]. Please read them, +and stick to them. For C/C++ code, use clang-format. For Python, PEP8 is your friend +(but again, check the actual coding guidelines). + +## Git commit messages are very important + +We follow standard git commit message guidelines, similar to many other open +source projects. See the [coding guidelines][grep1] for more details. In a +nutshell: +- Keep the lines below 72 characters +- Subject line has the component prepended (e.g., `kernelname:`) +- Avoid empty git commit messages +- The git commit message explains the change, the code only explains the current + state + +## Unit Tests + +VOLK unit tests compare the results of each kernel version to the generic version. +Keep the generic kernel version as simple as possible and verify your optimized +kernels against the generic version. + +## The Buddy Principle: Submit One, Review One + +When you've submitted a pull request, please take the time to review another +one. This helps make sure that there are always a number of reviews at least +equal to the number of pull requests, which means the maintainers don't get +overwhelmed when a lot is being contributed. + +## Standard command line options + +When writing programs that are executable from the command line, +please follow existing examples regarding their command line arguments, and +reuse them. + +[grep1]: https://github.com/gnuradio/greps/blob/master/grep-0001-coding-guidelines.md +[wikicontrib]: https://wiki.gnuradio.org/index.php/Development +[gr-devs]: https://github.com/orgs/gnuradio/teams/gr-devs +[gnuradiocontrib]: https://github.com/gnuradio/gnuradio/blob/master/CONTRIBUTING.md diff --git a/docs/DCO.txt b/docs/DCO.txt new file mode 100644 index 0000000..8201f99 --- /dev/null +++ b/docs/DCO.txt @@ -0,0 +1,37 @@ +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +1 Letterman Drive +Suite D4700 +San Francisco, CA, 94129 + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in new file mode 100644 index 0000000..70913f5 --- /dev/null +++ b/docs/Doxyfile.in @@ -0,0 +1,2364 @@ +# Doxyfile 1.8.6 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all text +# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv +# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv +# for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = "Vector Optimized Library of Kernels" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = @VERSION@ + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = "Architecture-tuned implementations of math kernels" + +# With the PROJECT_LOGO tag one can specify an logo or icon that is included in +# the documentation. The maximum height of the logo should not exceed 55 pixels +# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo +# to the output directory. + +PROJECT_LOGO = @CMAKE_SOURCE_DIR@/docs/volk_logo_small.png + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +# TODO: configure this to be a special docs directory. nw tried, but running +# make doc won' create the directory, but with doxygen it will. why? + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, +# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), +# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, +# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, +# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, +# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, +# Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a +# new page for each member. If set to NO, the documentation of a member will be +# part of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, Javascript, +# C#, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. +# +# Note For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by by putting a % sign in front of the word +# or globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = YES + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO these classes will be included in the various overviews. This option has +# no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = YES + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# (class|struct|union) declarations. If set to NO these declarations will be +# included in the documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the +# todo list. This list is created by putting \todo commands in the +# documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the +# test list. This list is created by putting \test commands in the +# documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES the list +# will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = @CMAKE_CURRENT_SOURCE_DIR@/DoxygenLayout.xml + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. Do not use file names with spaces, bibtex cannot handle them. See +# also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO doxygen will only warn about wrong or incomplete parameter +# documentation, but not about the absence of documentation. +# The default value is: NO. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. +# Note: If this tag is empty the current directory is searched. + +INPUT = @CMAKE_SOURCE_DIR@ + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: http://www.gnu.org/software/libiconv) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank the +# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, +# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, +# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, +# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, +# *.qsf, *.as and *.js. + +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.idl \ + *.ddl \ + *.odl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.cs \ + *.d \ + *.php \ + *.php4 \ + *.php5 \ + *.phtml \ + *.inc \ + *.m \ + *.markdown \ + *.md \ + *.mm \ + *.dox \ + *.py \ + *.f90 \ + *.f \ + *.for \ + *.tcl \ + *.vhd \ + *.vhdl \ + *.ucf \ + *.qsf \ + *.as \ + *.js + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = @CMAKE_BINARY_DIR@ @CMAKE_SOURCE_DIR@/cpu_features @CMAKE_SOURCE_DIR@/README.md @CMAKE_SOURCE_DIR@/docs/AUTHORS_RESUBMITTING_UNDER_LGPL_LICENSE.md + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER ) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# function all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES, then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see http://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user- +# defined cascading style sheet that is included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet file to the output directory. For an example +# see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the stylesheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to NO can help when comparing the output of multiple runs. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = NO + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: http://developer.apple.com/tools/xcode/), introduced with +# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler ( hhc.exe). If non-empty +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated ( +# YES) or that it should be included in the master .chm file ( NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated ( +# YES) or a normal table of contents ( NO) in the .chm file. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = YES + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = YES + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# http://www.mathjax.org) which uses client side Javascript for the rendering +# instead of using prerendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from http://www.mathjax.org before deployment. +# The default value is: http://cdn.mathjax.org/mathjax/latest. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /