From 1c151bd73fb3cd28719addfcafa3f92569fd0baa Mon Sep 17 00:00:00 2001 From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= Date: Sat, 22 Jan 2022 00:02:43 +0100 Subject: [PATCH] babl: sync cpuaccel with GEGL --- babl/babl-cpuaccel.c | 94 ++++++++++++++++++++++++++++++++++++++------ babl/babl-cpuaccel.h | 48 +++++++++++++++++----- 2 files changed, 120 insertions(+), 22 deletions(-) diff --git a/babl/babl-cpuaccel.c b/babl/babl-cpuaccel.c index 6c1f1cc..7d613d9 100644 --- a/babl/babl-cpuaccel.c +++ b/babl/babl-cpuaccel.c @@ -14,11 +14,10 @@ * You should have received a copy of the GNU Lesser General * Public License along with this library; if not, see * . + * + * (c) Manish Singh, Aaron Holtzman, Jan Heller, Ell, Øyvind Kolås */ -/* - * x86 bits Copyright (C) Manish Singh - */ /* * PPC CPU acceleration detection was taken from DirectFB but seems to be @@ -78,7 +77,6 @@ babl_cpu_accel_set_use (gboolean use) #define HAVE_ACCEL 1 - typedef enum { ARCH_X86_VENDOR_NONE, @@ -117,15 +115,26 @@ enum { ARCH_X86_INTEL_FEATURE_PNI = 1 << 0, ARCH_X86_INTEL_FEATURE_SSSE3 = 1 << 9, + ARCH_X86_INTEL_FEATURE_FMA = 1 << 12, ARCH_X86_INTEL_FEATURE_SSE4_1 = 1 << 19, ARCH_X86_INTEL_FEATURE_SSE4_2 = 1 << 20, + ARCH_X86_INTEL_FEATURE_MOVBE = 1 << 22, + ARCH_X86_INTEL_FEATURE_POPCNT = 1 << 23, + ARCH_X86_INTEL_FEATURE_XSAVE = 1 << 26, + ARCH_X86_INTEL_FEATURE_OSXSAVE = 1 << 27, ARCH_X86_INTEL_FEATURE_AVX = 1 << 28, ARCH_X86_INTEL_FEATURE_F16C = 1 << 29, - /* extended features */ - ARCH_X86_INTEL_FEATURE_AVX2 = 1 << 5 + // extended features + + ARCH_X86_INTEL_FEATURE_BMI1 = 1 << 3, + ARCH_X86_INTEL_FEATURE_BMI2 = 1 << 8, + ARCH_X86_INTEL_FEATURE_AVX2 = 1 << 5, }; + +/* x86 asm bit Copyright (C) Manish Singh + */ #if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__)) #define cpuid(op,eax,ebx,ecx,edx) \ __asm__ ("movl %%ebx, %%esi\n\t" \ @@ -256,19 +265,43 @@ arch_accel_intel (void) if (ecx & ARCH_X86_INTEL_FEATURE_SSE4_1) caps |= BABL_CPU_ACCEL_X86_SSE4_1; + if (ecx & ARCH_X86_INTEL_FEATURE_SSE4_2) + caps |= BABL_CPU_ACCEL_X86_SSE4_2; + + if (ecx & ARCH_X86_INTEL_FEATURE_AVX) + caps |= BABL_CPU_ACCEL_X86_AVX; + + if (ecx & ARCH_X86_INTEL_FEATURE_POPCNT) + caps |= BABL_CPU_ACCEL_X86_POPCNT; + + if (ecx & ARCH_X86_INTEL_FEATURE_XSAVE) + caps |= BABL_CPU_ACCEL_X86_XSAVE; + + if (ecx & ARCH_X86_INTEL_FEATURE_OSXSAVE) + caps |= BABL_CPU_ACCEL_X86_OSXSAVE; + + if (ecx & ARCH_X86_INTEL_FEATURE_FMA) + caps |= BABL_CPU_ACCEL_X86_FMA; + if (ecx & ARCH_X86_INTEL_FEATURE_F16C) caps |= BABL_CPU_ACCEL_X86_F16C; - cpuid (0, eax, ebx, ecx, edx); + if (ecx & ARCH_X86_INTEL_FEATURE_MOVBE) + caps |= BABL_CPU_ACCEL_X86_MOVBE; + cpuid (0, eax, ebx, ecx, edx); if (eax >= 7) - { - cpuid (7, eax, ebx, ecx, edx); - - if (ebx & ARCH_X86_INTEL_FEATURE_AVX2) - caps |= BABL_CPU_ACCEL_X86_AVX2; - } + { + cpuid (7, eax, ebx, ecx, edx); + if (ebx & ARCH_X86_INTEL_FEATURE_AVX2) + caps |= BABL_CPU_ACCEL_X86_AVX2; + if (ebx & ARCH_X86_INTEL_FEATURE_BMI1) + caps |= BABL_CPU_ACCEL_X86_BMI1; + if (ebx & ARCH_X86_INTEL_FEATURE_BMI2) + caps |= BABL_CPU_ACCEL_X86_BMI2; + } #endif /* USE_SSE */ + } #endif /* USE_MMX */ @@ -517,6 +550,41 @@ arch_accel (void) #endif /* ARCH_PPC && USE_ALTIVEC */ +#if defined(ARCH_ARM) + +#include +#include +#include +#include + +#define HAVE_ACCEL 1 + +static guint32 +arch_accel (void) +{ + /* TODO : add or hardcode the other ways it can be on arm, where + * this info comes from the system and not from running cpu + * instructions + */ + int has_neon = 0; + int fd = open ("/proc/self/auxv", O_RDONLY); + Elf32_auxv_t auxv; + if (fd >= 0) + { + while (read (fd, &auxv, sizeof (Elf32_auxv_t)) == sizeof (Elf32_auxv_t)) + { + if (auxv.a_type == AT_HWCAP) + { + if (auxv.a_un.a_val & 4096) + has_neon = 1; + } + } + close (fd); + } + return has_neon?GEGL_CPU_ACCEL_ARM_NEON:0; +} + +#endif /* ARCH_ARM */ static BablCpuAccelFlags cpu_accel (void) diff --git a/babl/babl-cpuaccel.h b/babl/babl-cpuaccel.h index b8a6855..133d138 100644 --- a/babl/babl-cpuaccel.h +++ b/babl/babl-cpuaccel.h @@ -24,25 +24,55 @@ typedef enum BABL_CPU_ACCEL_NONE = 0x0, /* x86 accelerations */ - BABL_CPU_ACCEL_X86_MMX = 0x01000000, + BABL_CPU_ACCEL_X86_MMX = 0x80000000, BABL_CPU_ACCEL_X86_3DNOW = 0x40000000, BABL_CPU_ACCEL_X86_MMXEXT = 0x20000000, BABL_CPU_ACCEL_X86_SSE = 0x10000000, BABL_CPU_ACCEL_X86_SSE2 = 0x08000000, - BABL_CPU_ACCEL_X86_SSE3 = 0x02000000, - BABL_CPU_ACCEL_X86_SSSE3 = 0x00800000, - BABL_CPU_ACCEL_X86_SSE4_1 = 0x00400000, - /* BABL_CPU_ACCEL_X86_SSE4_2 = 0x00200000, */ - /* BABL_CPU_ACCEL_X86_AVX = 0x00080000, */ + BABL_CPU_ACCEL_X86_SSE3 = 0x04000000, + BABL_CPU_ACCEL_X86_SSSE3 = 0x02000000, + BABL_CPU_ACCEL_X86_SSE4_1 = 0x01000000, + BABL_CPU_ACCEL_X86_SSE4_2 = 0x00800000, + BABL_CPU_ACCEL_X86_AVX = 0x00400000, + BABL_CPU_ACCEL_X86_POPCNT = 0x00200000, + BABL_CPU_ACCEL_X86_FMA = 0x00100000, + BABL_CPU_ACCEL_X86_MOVBE = 0x00080000, BABL_CPU_ACCEL_X86_F16C = 0x00040000, - BABL_CPU_ACCEL_X86_AVX2 = 0x00020000, + BABL_CPU_ACCEL_X86_XSAVE = 0x00020000, + BABL_CPU_ACCEL_X86_OSXSAVE = 0x00010000, + BABL_CPU_ACCEL_X86_BMI1 = 0x00008000, + BABL_CPU_ACCEL_X86_BMI2 = 0x00004000, + BABL_CPU_ACCEL_X86_AVX2 = 0x00002000, + + BABL_CPU_ACCEL_X86_64_V2 = + (BABL_CPU_ACCEL_X86_POPCNT| + BABL_CPU_ACCEL_X86_SSE4_1| + BABL_CPU_ACCEL_X86_SSE4_2| + BABL_CPU_ACCEL_X86_SSSE3), + + BABL_CPU_ACCEL_X86_64_V3 = + (BABL_CPU_ACCEL_X86_64_V2| + BABL_CPU_ACCEL_X86_BMI1| + BABL_CPU_ACCEL_X86_BMI2| + BABL_CPU_ACCEL_X86_AVX| + BABL_CPU_ACCEL_X86_FMA| + BABL_CPU_ACCEL_X86_F16C| + BABL_CPU_ACCEL_X86_AVX2| + BABL_CPU_ACCEL_X86_OSXSAVE| + BABL_CPU_ACCEL_X86_MOVBE), /* powerpc accelerations */ - BABL_CPU_ACCEL_PPC_ALTIVEC = 0x04000000, - BABL_CPU_ACCEL_X86_64 = 0x00100000 + BABL_CPU_ACCEL_PPC_ALTIVEC = 0x00000010, + + /* arm accelerations */ + BABL_CPU_ACCEL_ARM_NEON = 0x00000020, + + /* x86_64 arch */ + BABL_CPU_ACCEL_X86_64 = 0x00000040 } BablCpuAccelFlags; + BablCpuAccelFlags babl_cpu_accel_get_support (void); void babl_cpu_accel_set_use (unsigned int use); -- 2.30.2