From b4f0b28d0bb14d718660e1663fa48e7acfaf6849 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= Date: Sat, 22 Jan 2022 01:26:17 +0100 Subject: [PATCH] base: x86-64-v2 and x86-64-v3 variants --- babl/babl-extension.c | 26 ++++++++++++++++++- babl/base/babl-base.c | 26 +++++++++---------- babl/base/babl-base.h | 56 +++++++++++++++++++++++++++++------------ babl/base/formats.c | 2 +- babl/base/meson.build | 19 ++++++++++++++ babl/base/model-cmyk.c | 2 +- babl/base/model-gray.c | 2 +- babl/base/model-rgb.c | 2 +- babl/base/model-ycbcr.c | 2 +- babl/base/type-float.c | 2 +- babl/base/type-half.c | 2 +- babl/base/type-u15.c | 2 +- babl/base/type-u16.c | 2 +- babl/base/type-u32.c | 2 +- babl/base/type-u8.c | 2 +- babl/meson.build | 9 +++++++ meson.build | 9 ++++--- 17 files changed, 123 insertions(+), 44 deletions(-) diff --git a/babl/babl-extension.c b/babl/babl-extension.c index 41edb8e..ed0a64b 100644 --- a/babl/babl-extension.c +++ b/babl/babl-extension.c @@ -31,9 +31,31 @@ #include "babl-internal.h" #include "babl-db.h" #include "babl-base.h" + +#include "babl-cpuaccel.h" #include #include +void (*babl_base_init) (void) = babl_base_init_generic; +#ifdef ARCH_X86_64 +void babl_base_init_x86_64_v2 (void); +void babl_base_init_x86_64_v3 (void); +#endif + +static void base_init (void) +{ +#ifdef ARCH_X86_64 + BablCpuAccelFlags accel = babl_cpu_accel_get_support (); + if (accel & BABL_CPU_ACCEL_X86_64_V3) + babl_base_init_x86_64_v3 (); + else if (accel & BABL_CPU_ACCEL_X86_64_V2) + babl_base_init_x86_64_v2 (); + else +#endif + babl_base_init_generic (); +} + + static Babl *babl_extension_current_extender = NULL; Babl * @@ -107,7 +129,9 @@ babl_extension_base (void) if (ret != babl) babl_free (babl); else - babl_base_init (); + { + base_init (); + } babl = ret; } babl_set_extender (NULL); diff --git a/babl/base/babl-base.c b/babl/base/babl-base.c index 1d93341..8b9cdde 100644 --- a/babl/base/babl-base.c +++ b/babl/base/babl-base.c @@ -25,19 +25,19 @@ static void types (void); static void models (void); void -babl_base_init (void) +BABL_SIMD_SUFFIX(babl_base_init) (void) { babl_hmpf_on_name_lookups++; types (); models (); - babl_formats_init (); + BABL_SIMD_SUFFIX (babl_formats_init) (); babl_hmpf_on_name_lookups--; } void -babl_base_destroy (void) +BABL_SIMD_SUFFIX(babl_base_destroy) (void) { /* done by the destruction of the elemental babl clases */ } @@ -50,12 +50,12 @@ babl_base_destroy (void) static void types (void) { - babl_base_type_float (); - babl_base_type_u15 (); - babl_base_type_half (); - babl_base_type_u8 (); - babl_base_type_u16 (); - babl_base_type_u32 (); + BABL_SIMD_SUFFIX (babl_base_type_float) (); + BABL_SIMD_SUFFIX (babl_base_type_u15) (); + BABL_SIMD_SUFFIX (babl_base_type_half) (); + BABL_SIMD_SUFFIX (babl_base_type_u8) (); + BABL_SIMD_SUFFIX (babl_base_type_u16) (); + BABL_SIMD_SUFFIX (babl_base_type_u32) (); } /* @@ -67,9 +67,9 @@ static void models (void) { babl_hmpf_on_name_lookups--; - babl_base_model_rgb (); - babl_base_model_gray (); - babl_base_model_cmyk (); + BABL_SIMD_SUFFIX (babl_base_model_rgb) (); + BABL_SIMD_SUFFIX (babl_base_model_gray) (); + BABL_SIMD_SUFFIX (babl_base_model_cmyk) (); babl_hmpf_on_name_lookups++; - babl_base_model_ycbcr (); + BABL_SIMD_SUFFIX (babl_base_model_ycbcr) (); } diff --git a/babl/base/babl-base.h b/babl/base/babl-base.h index 64f1667..67c4a53 100644 --- a/babl/base/babl-base.h +++ b/babl/base/babl-base.h @@ -19,22 +19,46 @@ #ifndef _BABL_BASE_H #define _BABL_BASE_H +#ifdef X86_64_V2 +#define BABL_SIMD_SUFFIX(symbol) symbol##_x86_64_v2 +#else +#ifdef X86_64_V3 +#define BABL_SIMD_SUFFIX(symbol) symbol##_x86_64_v3 +#else +#define BABL_SIMD_SUFFIX(symbol) symbol##_generic +#endif +#endif + +extern void (*babl_base_init) (void); +extern void (*babl_base_destroy) (void); +extern void (*babl_formats_init) (void); + +extern void (*babl_base_type_half) (void); +extern void (*babl_base_type_float) (void); +extern void (*babl_base_type_u8) (void); +extern void (*babl_base_type_u16) (void); +extern void (*babl_base_type_u15) (void); +extern void (*babl_base_type_u32) (void); + +extern void (*babl_base_model_rgb) (void); +extern void (*babl_base_model_cmyk) (void); +extern void (*babl_base_model_gray) (void); +extern void (*babl_base_model_ycbcr) (void); + +void BABL_SIMD_SUFFIX(babl_base_init) (void); +void BABL_SIMD_SUFFIX(babl_base_destroy) (void); +void BABL_SIMD_SUFFIX(babl_formats_init) (void); + +void BABL_SIMD_SUFFIX(babl_base_type_half) (void); +void BABL_SIMD_SUFFIX(babl_base_type_float) (void); +void BABL_SIMD_SUFFIX(babl_base_type_u8) (void); +void BABL_SIMD_SUFFIX(babl_base_type_u16) (void); +void BABL_SIMD_SUFFIX(babl_base_type_u15) (void); +void BABL_SIMD_SUFFIX(babl_base_type_u32) (void); -void babl_base_init (void); -void babl_base_destroy (void); -void babl_formats_init (void); - -void babl_base_type_half (void); -void babl_base_type_float (void); -void babl_base_type_u8 (void); -void babl_base_type_u16 (void); -void babl_base_type_u15 (void); -void babl_base_type_u32 (void); - -void babl_base_model_pal (void); -void babl_base_model_rgb (void); -void babl_base_model_cmyk (void); -void babl_base_model_gray (void); -void babl_base_model_ycbcr (void); +void BABL_SIMD_SUFFIX(babl_base_model_rgb) (void); +void BABL_SIMD_SUFFIX(babl_base_model_cmyk) (void); +void BABL_SIMD_SUFFIX(babl_base_model_gray) (void); +void BABL_SIMD_SUFFIX(babl_base_model_ycbcr) (void); #endif diff --git a/babl/base/formats.c b/babl/base/formats.c index bad9d14..bbdedf3 100644 --- a/babl/base/formats.c +++ b/babl/base/formats.c @@ -25,7 +25,7 @@ #include "babl-base.h" void -babl_formats_init (void) +BABL_SIMD_SUFFIX (babl_formats_init) (void) { const Babl *types[]={ babl_type_from_id (BABL_DOUBLE), diff --git a/babl/base/meson.build b/babl/base/meson.build index a78fd84..41287c0 100644 --- a/babl/base/meson.build +++ b/babl/base/meson.build @@ -20,4 +20,23 @@ babl_base = static_library('babl_base', babl_base_sources, include_directories: [rootInclude, bablInclude], dependencies: [math, lcms], + c_args: common_c_flags ) + +if host_cpu_family == 'x86_64' + + babl_base_x86_64_v2 = static_library('babl_base-x86-64-v2', + babl_base_sources, + include_directories: [rootInclude, bablInclude], + dependencies: [math, lcms], + c_args: common_c_flags + x86_64_v2_flags + '-DX86_64_V2' + ) + + babl_base_x86_64_v3 = static_library('babl_base-x86-64-v3', + babl_base_sources, + include_directories: [rootInclude, bablInclude], + dependencies: [math, lcms], + c_args: common_c_flags + x86_64_v3_flags + '-DX86_64_V3' + ) + +endif diff --git a/babl/base/model-cmyk.c b/babl/base/model-cmyk.c index 13fdedf..1fa02be 100644 --- a/babl/base/model-cmyk.c +++ b/babl/base/model-cmyk.c @@ -613,7 +613,7 @@ cmy_to_rgba (const Babl *conversion, #endif void -babl_base_model_cmyk (void) +BABL_SIMD_SUFFIX (babl_base_model_cmyk) (void) { babl_component_new ("cyan", NULL); babl_component_new ("yellow", NULL); diff --git a/babl/base/model-gray.c b/babl/base/model-gray.c index 3862400..9a74096 100644 --- a/babl/base/model-gray.c +++ b/babl/base/model-gray.c @@ -31,7 +31,7 @@ static void formats (void); static void init_single_precision (void); void -babl_base_model_gray (void) +BABL_SIMD_SUFFIX (babl_base_model_gray) (void) { components (); models (); diff --git a/babl/base/model-rgb.c b/babl/base/model-rgb.c index a3064ef..824665a 100644 --- a/babl/base/model-rgb.c +++ b/babl/base/model-rgb.c @@ -32,7 +32,7 @@ static void formats (void); static void init_single_precision (void); void -babl_base_model_rgb (void) +BABL_SIMD_SUFFIX (babl_base_model_rgb) (void) { components (); models (); diff --git a/babl/base/model-ycbcr.c b/babl/base/model-ycbcr.c index 64db6a2..e061298 100644 --- a/babl/base/model-ycbcr.c +++ b/babl/base/model-ycbcr.c @@ -34,7 +34,7 @@ static void conversions (void); static void formats (void); void -babl_base_model_ycbcr (void) +BABL_SIMD_SUFFIX (babl_base_model_ycbcr) (void) { components (); models (); diff --git a/babl/base/type-float.c b/babl/base/type-float.c index 5b03b3f..9517831 100644 --- a/babl/base/type-float.c +++ b/babl/base/type-float.c @@ -83,7 +83,7 @@ convert_float_float (const Babl *babl, void -babl_base_type_float (void) +BABL_SIMD_SUFFIX (babl_base_type_float) (void) { babl_type_new ( "float", diff --git a/babl/base/type-half.c b/babl/base/type-half.c index 862d662..a146185 100644 --- a/babl/base/type-half.c +++ b/babl/base/type-half.c @@ -395,7 +395,7 @@ convert_half_float (BablConversion *conversion, void -babl_base_type_half (void) +BABL_SIMD_SUFFIX (babl_base_type_half) (void) { babl_type_new ( "half", diff --git a/babl/base/type-u15.c b/babl/base/type-u15.c index ea35453..7224c63 100644 --- a/babl/base/type-u15.c +++ b/babl/base/type-u15.c @@ -198,7 +198,7 @@ convert_u15_float_scaled (BablConversion *conversion, MAKE_CONVERSIONS_float (u15, 0.0, 1.0, 0, (1<<15)) void -babl_base_type_u15 (void) +BABL_SIMD_SUFFIX (babl_base_type_u15) (void) { babl_hmpf_on_name_lookups--; babl_type_new ( diff --git a/babl/base/type-u16.c b/babl/base/type-u16.c index c5a41dc..e7ab936 100644 --- a/babl/base/type-u16.c +++ b/babl/base/type-u16.c @@ -196,7 +196,7 @@ MAKE_CONVERSIONS_float (u16, 0.0, 1.0, 0, UINT16_MAX) void -babl_base_type_u16 (void) +BABL_SIMD_SUFFIX (babl_base_type_u16) (void) { babl_type_new ( "u16", diff --git a/babl/base/type-u32.c b/babl/base/type-u32.c index 48b1506..288ff2e 100644 --- a/babl/base/type-u32.c +++ b/babl/base/type-u32.c @@ -196,7 +196,7 @@ MAKE_CONVERSIONS_float(u32, 0.0, 1.0, 0, UINT32_MAX) void -babl_base_type_u32 (void) +BABL_SIMD_SUFFIX (babl_base_type_u32) (void) { babl_type_new ( "u32", diff --git a/babl/base/type-u8.c b/babl/base/type-u8.c index d41d5e0..9abbf67 100644 --- a/babl/base/type-u8.c +++ b/babl/base/type-u8.c @@ -202,7 +202,7 @@ MAKE_CONVERSIONS_float (u8_chroma, -0.5, 0.5, 16, 240) void -babl_base_type_u8 (void) +BABL_SIMD_SUFFIX (babl_base_type_u8) (void) { babl_type_new ( "u8", diff --git a/babl/meson.build b/babl/meson.build index d19210a..b9b2a05 100644 --- a/babl/meson.build +++ b/babl/meson.build @@ -121,6 +121,14 @@ endif babl_deps = [math, thread, dl, lcms] babl_includes = [rootInclude, bablBaseInclude] +if host_cpu_family == 'x86_64' + simd_extra = [babl_base_x86_64_v2, babl_base_x86_64_v3] +#elif host_cpu_family == 'arm' +# simd_extra = [babl_base_arm_neon] +else + simd_extra = [] +endif + # build library babl = library( lib_name, @@ -129,6 +137,7 @@ babl = library( c_args: babl_c_args, link_whole: babl_base, link_args: babl_link_args, + link_with: simd_extra, dependencies: babl_deps, link_depends: version_script, version: so_version, diff --git a/meson.build b/meson.build index 8c6eebf..bfa5dcf 100644 --- a/meson.build +++ b/meson.build @@ -166,6 +166,7 @@ common_c_flags += cc.get_supported_arguments( ['-fno-unsafe-math-optimizations','-ftree-vectorize'] ) + extra_warnings_list = [ '-Wdeclaration-after-statement', '-Winit-self', @@ -185,13 +186,15 @@ else no_undefined = [] endif -if host_cpu_family == 'arm' +if host_cpu_family == 'x86_64' + x86_64_v2_flags = cc.get_supported_arguments(['-march=x86-64','-msse2', '-msse2','-msse4.1','-msse4.2','-mpopcnt','-mssse3']) + x86_64_v3_flags = x86_64_v2_flags + cc.get_supported_arguments(['-mavx','-mavx2','-mf16c','-mfma','-mmovbe', '-mbmi', '-mbmi2']) +elif host_cpu_family == 'arm' arm_neon_flags = cc.get_supported_arguments(['-mfpu=neon']) elif host_cpu_family == 'aarch64' - common_cflags += cc.get_supported_arguments(['-mfpu=neon']) + common_c_flags += cc.get_supported_arguments(['-mfpu=neon']) endif - ################################################################################ # Check for compiler CPU extensions -- 2.30.2