From: Ell Date: Wed, 24 Jul 2019 20:21:01 +0000 (+0300) Subject: configure.ac, meson, cpu-accel: add AVX2 detection X-Git-Tag: archive/raspbian/1%0.1.106-3+rpi1^2~15^2~11^2~53 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=385f0b545727262f58d3cfcf5523f69ace0e0166;p=babl.git configure.ac, meson, cpu-accel: add AVX2 detection Detect AVX2 support during configuration and runtime, in preperation for next commit. --- diff --git a/babl/babl-cpuaccel.c b/babl/babl-cpuaccel.c index 534fa89..ef26fa5 100644 --- a/babl/babl-cpuaccel.c +++ b/babl/babl-cpuaccel.c @@ -121,11 +121,15 @@ enum ARCH_X86_INTEL_FEATURE_SSE4_2 = 1 << 20, ARCH_X86_INTEL_FEATURE_AVX = 1 << 28, ARCH_X86_INTEL_FEATURE_F16C = 1 << 29, + + /* extended features */ + ARCH_X86_INTEL_FEATURE_AVX2 = 1 << 5 }; #if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__)) #define cpuid(op,eax,ebx,ecx,edx) \ __asm__ ("movl %%ebx, %%esi\n\t" \ + "xor %%ecx, %%ecx\n\t" \ "cpuid\n\t" \ "xchgl %%ebx,%%esi" \ : "=a" (eax), \ @@ -135,7 +139,8 @@ enum : "0" (op)) #else #define cpuid(op,eax,ebx,ecx,edx) \ - __asm__ ("cpuid" \ + __asm__ ("xor %%ecx, %%ecx\n\t" \ + "cpuid" \ : "=a" (eax), \ "=b" (ebx), \ "=c" (ecx), \ @@ -253,6 +258,16 @@ arch_accel_intel (void) if (ecx & ARCH_X86_INTEL_FEATURE_F16C) caps |= BABL_CPU_ACCEL_X86_F16C; + + cpuid (0, eax, ebx, ecx, edx); + + if (eax >= 7) + { + cpuid (7, eax, ebx, ecx, edx); + + if (ebx & ARCH_X86_INTEL_FEATURE_AVX2) + caps |= BABL_CPU_ACCEL_X86_AVX2; + } #endif /* USE_SSE */ } #endif /* USE_MMX */ diff --git a/babl/babl-cpuaccel.h b/babl/babl-cpuaccel.h index 738bc59..b8a6855 100644 --- a/babl/babl-cpuaccel.h +++ b/babl/babl-cpuaccel.h @@ -35,6 +35,7 @@ typedef enum /* BABL_CPU_ACCEL_X86_SSE4_2 = 0x00200000, */ /* BABL_CPU_ACCEL_X86_AVX = 0x00080000, */ BABL_CPU_ACCEL_X86_F16C = 0x00040000, + BABL_CPU_ACCEL_X86_AVX2 = 0x00020000, /* powerpc accelerations */ BABL_CPU_ACCEL_PPC_ALTIVEC = 0x04000000, diff --git a/configure.ac b/configure.ac index 7f53331..bb29428 100644 --- a/configure.ac +++ b/configure.ac @@ -353,6 +353,10 @@ AC_ARG_ENABLE(sse4_1, [ --enable-sse4_1 enable SSE4_1 support (default=auto)],, enable_sse4_1=$enable_sse) +AC_ARG_ENABLE(avx2, + [ --enable-avx2 enable AVX2 support (default=auto)],, + enable_avx2=$enable_sse) + AC_ARG_ENABLE(f16c, [ --enable-f16c enable hardware half-float support (default=auto)],, enable_f16c=$enable_sse) @@ -363,6 +367,7 @@ if test "x$enable_mmx" = xyes; then SSE2_EXTRA_CFLAGS= SSE3_EXTRA_CFLAGS= SSE4_1_EXTRA_CFLAGS= + AVX2_EXTRA_CFLAGS= F16C_EXTRA_CFLAGS= AC_MSG_CHECKING(whether we can compile MMX code) @@ -448,6 +453,24 @@ if test "x$enable_mmx" = xyes; then AC_MSG_RESULT(no) AC_MSG_WARN([The assembler does not support the SSE4_1 command set.]) ) + + if test "x$enable_avx2" = xyes; then + BABL_DETECT_CFLAGS(avx2_flag, '-mavx2') + AVX2_EXTRA_CFLAGS="$SSE4_1_EXTRA_CFLAGS $avx2_flag" + + AC_MSG_CHECKING(whether we can compile AVX2 code) + + CFLAGS="$CFLAGS $avx2_flag" + + AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vpgatherdd %ymm0,(%rax,%ymm1,4),%ymm2");])], + AC_DEFINE(USE_AVX2, 1, [Define to 1 if AVX2 assembly is available.]) + AC_MSG_RESULT(yes) + , + enable_avx2=no + AC_MSG_RESULT(no) + AC_MSG_WARN([The assembler does not support the AVX2 command set.]) + ) + fi fi fi fi @@ -486,6 +509,7 @@ if test "x$enable_mmx" = xyes; then AC_SUBST(SSE2_EXTRA_CFLAGS) AC_SUBST(SSE3_EXTRA_CFLAGS) AC_SUBST(SSE4_1_EXTRA_CFLAGS) + AC_SUBST(AVX2_EXTRA_CFLAGS) AC_SUBST(F16C_EXTRA_CFLAGS) fi diff --git a/meson.build b/meson.build index b17db52..c72688e 100644 --- a/meson.build +++ b/meson.build @@ -216,6 +216,16 @@ if cc.has_argument('-mmmx') and get_option('enable-mmx') conf.set('USE_SSE4_1', 1, description: 'Define to 1 if sse4.1 assembly is available.') endif + + # avx2 assembly + if cc.has_argument('-mavx2') and get_option('enable-avx2') + if cc.compiles('asm ("vpgatherdd %ymm0,(%rax,%ymm1,4),%ymm2");') + message('avx2 assembly available') + avx2_cflags = '-mavx2' + conf.set('USE_AVX2', 1, description: + 'Define to 1 if avx2 assembly is available.') + endif + endif endif endif endif diff --git a/meson_options.txt b/meson_options.txt index ab08ce9..f4a7ced 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -3,6 +3,7 @@ option('enable-sse', type: 'boolean', value: true, description: 'enable SSE s option('enable-sse2', type: 'boolean', value: true, description: 'enable SSE2 support') option('enable-sse3', type: 'boolean', value: true, description: 'enable SSE3 support') option('enable-sse4_1', type: 'boolean', value: true, description: 'enable SSE4.1 support') +option('enable-avx2', type: 'boolean', value: true, description: 'enable AVX2 support') option('enable-f16c', type: 'boolean', value: true, description: 'enable hardware half-float support') option('with-docs', type: 'boolean', value: true, description: 'build website') option('with-lcms', type: 'boolean', value: true, description: 'build with lcms')