From: Øyvind Kolås Date: Sun, 23 Jan 2022 00:10:09 +0000 (+0100) Subject: multipass build, with SIMD variants for x86_64 and arm X-Git-Tag: archive/raspbian/1%0.1.106-3+rpi1^2~15^2~4^2~61 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=ef3a19a4d96c6701939a37eed1d27fc3d33873ec;p=babl.git multipass build, with SIMD variants for x86_64 and arm Some extensions have their own separate build, so does the RGB color space conversions, trc computations and base set of conversions. --- diff --git a/babl/babl-classes.h b/babl/babl-classes.h index c25172a..cf03447 100644 --- a/babl/babl-classes.h +++ b/babl/babl-classes.h @@ -59,7 +59,7 @@ enum { #include "babl-type.h" #include "babl-sampling.h" -#include "babl-trc.h" +#include "base/babl-trc.h" #include "babl-space.h" #include "babl-component.h" #include "babl-model.h" diff --git a/babl/babl-core.c b/babl/babl-core.c index d78b5e5..2bd36c7 100644 --- a/babl/babl-core.c +++ b/babl/babl-core.c @@ -162,3 +162,184 @@ babl_core_init (void) NULL ); } + + +/////////////////// temporary here +/////////////////// + +const Babl * +babl_trc_lut (const char *name, + int n, + float *entries) +{ + return babl_trc_new (name, BABL_TRC_LUT, 0, n, entries); +} + + +const Babl * +babl_trc_formula_srgb (double g, + double a, + double b, + double c, + double d, + double e, + double f) +{ + char name[128]; + int i; + float params[7]={g, a, b, c, d, e, f}; + + if (fabs (g - 2.400) < 0.01 && + fabs (a - 0.947) < 0.01 && + fabs (b - 0.052) < 0.01 && + fabs (c - 0.077) < 0.01 && + fabs (d - 0.040) < 0.01 && + fabs (e - 0.000) < 0.01 && + fabs (f - 0.000) < 0.01 + ) + return babl_trc ("sRGB"); + + snprintf (name, sizeof (name), "%.6f %.6f %.4f %.4f %.4f %.4f %.4f", g, a, b, c, d, e, f); + for (i = 0; name[i]; i++) + if (name[i] == ',') name[i] = '.'; + while (name[strlen(name)-1]=='0') + name[strlen(name)-1]='\0'; + return babl_trc_new (name, BABL_TRC_FORMULA_SRGB, g, 0, params); +} + +const Babl * +babl_trc_formula_cie (double g, + double a, + double b, + double c) +{ + char name[128]; + int i; + float params[4]={g, a, b, c}; + + snprintf (name, sizeof (name), "%.6f %.6f %.4f %.4f", g, a, b, c); + for (i = 0; name[i]; i++) + if (name[i] == ',') name[i] = '.'; + while (name[strlen(name)-1]=='0') + name[strlen(name)-1]='\0'; + return babl_trc_new (name, BABL_TRC_FORMULA_CIE, g, 0, params); +} + + +const Babl * +babl_trc_gamma (double gamma) +{ + char name[32]; + int i; + if (fabs (gamma - 1.0) < 0.01) + return babl_trc_new ("linear", BABL_TRC_LINEAR, 1.0, 0, NULL); + + snprintf (name, sizeof (name), "%.6f", gamma); + for (i = 0; name[i]; i++) + if (name[i] == ',') name[i] = '.'; + while (name[strlen(name)-1]=='0') + name[strlen(name)-1]='\0'; + return babl_trc_new (name, BABL_TRC_FORMULA_GAMMA, gamma, 0, NULL); +} + +void +babl_trc_class_init (void) +{ + babl_trc_new ("sRGB", BABL_TRC_SRGB, 2.2, 0, NULL); + babl_trc_gamma (2.2); + babl_trc_gamma (1.8); + babl_trc_gamma (1.0); + babl_trc_new ("linear", BABL_TRC_LINEAR, 1.0, 0, NULL); +} + +#if 0 +float +babl_trc_from_linear (const Babl *trc_, + float value) +{ + return babl_trc_from_linear (trc_, value); +} + +float +babl_trc_to_linear (const Babl *trc_, + float value) +{ + return babl_trc_to_linear (trc_, value); +} +#endif + +static int +babl_lut_match_gamma (float *lut, + int lut_size, + float gamma) +{ + int match = 1; + int i; + if (lut_size > 1024) + { + for (i = 0; match && i < lut_size; i++) + { + if (fabs (lut[i] - pow ((i / (lut_size-1.0)), gamma)) > 0.0001) + match = 0; + } + } + else + { + for (i = 0; match && i < lut_size; i++) + { + if (fabs (lut[i] - pow ((i / (lut_size-1.0)), gamma)) > 0.001) + match = 0; + } + } + return match; +} + +const Babl * +babl_trc_lut_find (float *lut, + int lut_size) +{ + int i; + int match = 1; + + /* look for linear match */ + for (i = 0; match && i < lut_size; i++) + if (fabs (lut[i] - i / (lut_size-1.0)) > 0.015) + match = 0; + if (match) + return babl_trc_gamma (1.0); + + /* look for sRGB match: */ + match = 1; + if (lut_size > 1024) + { + for (i = 0; match && i < lut_size; i++) + { + if (fabs (lut[i] - gamma_2_2_to_linear (i / (lut_size-1.0))) > 0.0001) + match = 0; + } + } + else + { + for (i = 0; match && i < lut_size; i++) + { + if (fabs (lut[i] - gamma_2_2_to_linear (i / (lut_size-1.0))) > 0.001) + match = 0; + } + } + if (match) + return babl_trc ("sRGB"); + + if (babl_lut_match_gamma (lut, lut_size, 2.2)) + return babl_trc_gamma(2.2); + + if (babl_lut_match_gamma (lut, lut_size, 1.8)) + return babl_trc_gamma(1.8); + + return NULL; +} + +const Babl * babl_trc (const char *name) +{ + return babl_trc_lookup_by_name (name); +} + diff --git a/babl/babl-extension.c b/babl/babl-extension.c index ed0a64b..554c764 100644 --- a/babl/babl-extension.c +++ b/babl/babl-extension.c @@ -32,29 +32,9 @@ #include "babl-db.h" #include "babl-base.h" -#include "babl-cpuaccel.h" #include #include -void (*babl_base_init) (void) = babl_base_init_generic; -#ifdef ARCH_X86_64 -void babl_base_init_x86_64_v2 (void); -void babl_base_init_x86_64_v3 (void); -#endif - -static void base_init (void) -{ -#ifdef ARCH_X86_64 - BablCpuAccelFlags accel = babl_cpu_accel_get_support (); - if (accel & BABL_CPU_ACCEL_X86_64_V3) - babl_base_init_x86_64_v3 (); - else if (accel & BABL_CPU_ACCEL_X86_64_V2) - babl_base_init_x86_64_v2 (); - else -#endif - babl_base_init_generic (); -} - static Babl *babl_extension_current_extender = NULL; @@ -130,7 +110,7 @@ babl_extension_base (void) babl_free (babl); else { - base_init (); + babl_base_init (); } babl = ret; } diff --git a/babl/babl-internal.h b/babl/babl-internal.h index 8b8ebd6..ec6008b 100644 --- a/babl/babl-internal.h +++ b/babl/babl-internal.h @@ -363,13 +363,16 @@ void babl_store_db (void); int _babl_max_path_len (void); -const Babl * -babl_trc_new (const char *name, +extern const Babl * +(*babl_trc_new) (const char *name, BablTRCType type, double gamma, int n_lut, float *lut); +extern const Babl * +(*babl_trc_lookup_by_name) (const char *name); + void babl_space_to_xyz (const Babl *space, const double *rgb, double *xyz); void babl_space_from_xyz (const Babl *space, const double *xyz, double *rgb); @@ -384,7 +387,7 @@ const char * babl_conversion_create_name (Babl *source, Babl *destination, int type, int allow_collision); -void _babl_space_add_universal_rgb (const Babl *space); +extern void (*_babl_space_add_universal_rgb) (const Babl *space); const Babl * babl_trc_formula_srgb (double gamma, double a, double b, double c, double d, double e, double f); const Babl * @@ -468,4 +471,7 @@ char *babl_space_to_icc (const Babl *space, Babl * _babl_space_for_lcms (const char *icc_data, int icc_length); // XXX pass profile for dedup? +void +babl_trc_class_init (void); + #endif diff --git a/babl/babl-introspect.c b/babl/babl-introspect.c index 6230f92..00168cc 100644 --- a/babl/babl-introspect.c +++ b/babl/babl-introspect.c @@ -68,7 +68,7 @@ babl_introspect (Babl *babl) babl_conversion_class_for_each (each_introspect, NULL); babl_log (""); babl_log ("trcs:"); - babl_trc_class_for_each (each_introspect, NULL); + //babl_trc_class_for_each (each_introspect, NULL); babl_log (""); babl_log ("spaces:"); babl_space_class_for_each (each_introspect, NULL); diff --git a/babl/babl-matrix.h b/babl/babl-matrix.h index 2107b31..714debc 100644 --- a/babl/babl-matrix.h +++ b/babl/babl-matrix.h @@ -9,7 +9,7 @@ static inline void babl_matrix_mul_matrix (const double *matA_, const double *matB_, double *out) { - int i, j; + unsigned int i, j; double matA[9]; double matB[9]; double t1, t2, t3; @@ -36,7 +36,7 @@ static inline void babl_matrix_mul_matrixf (const float *matA_, const float *matB_, float *out) { - int i, j; + unsigned int i, j; float matA[9]; float matB[9]; float t1, t2, t3; @@ -60,7 +60,7 @@ static inline void babl_matrix_mul_matrixf (const float *matA_, static inline void babl_matrix_to_float (const double *in, float *out) { - int i; + unsigned int i; for (i = 0; i < 9; i ++) out[i] = in[i]; } @@ -141,9 +141,9 @@ static inline void babl_matrix_mul_vectorff (const float *mat, const float *v_in } static inline void babl_matrix_mul_vectorff_buf3 (const float *mat, const float *v_in, float *v_out, - int samples) + unsigned int samples) { - int i; + unsigned int i; const float m_0_0 = m(mat, 0, 0); const float m_0_1 = m(mat, 0, 1); const float m_0_2 = m(mat, 0, 2); @@ -166,7 +166,7 @@ static inline void babl_matrix_mul_vectorff_buf3 (const float *mat, const float } static inline void babl_matrix_mul_vectorff_buf4 (const float *mat, const float *v_in, float *v_out, - int samples) + unsigned int samples) { const float m_0_0 = m(mat, 0, 0); const float m_0_1 = m(mat, 0, 1); @@ -177,7 +177,7 @@ static inline void babl_matrix_mul_vectorff_buf4 (const float *mat, const float const float m_2_0 = m(mat, 2, 0); const float m_2_1 = m(mat, 2, 1); const float m_2_2 = m(mat, 2, 2); - int i; + unsigned int i; for (i = 0; i < samples; i ++) { float a = v_in[0], b = v_in[1], c = v_in[2]; @@ -192,9 +192,9 @@ static inline void babl_matrix_mul_vectorff_buf4 (const float *mat, const float } static inline void babl_matrix_mul_vector_buf4 (const double *mat, const double *v_in, double *v_out, - int samples) + unsigned int samples) { - int i; + unsigned int i; const double m_0_0 = m(mat, 0, 0); const double m_0_1 = m(mat, 0, 1); const double m_0_2 = m(mat, 0, 2); diff --git a/babl/babl-space.c b/babl/babl-space.c index 2cb1b27..0923354 100644 --- a/babl/babl-space.c +++ b/babl/babl-space.c @@ -675,650 +675,6 @@ babl_space_get_rgbtoxyz (const Babl *space) return space->space.RGBtoXYZ; } -/////////////////// - - -static void -prep_conversion (const Babl *babl) -{ - Babl *conversion = (void*) babl; - const Babl *source_space = babl_conversion_get_source_space (conversion); - float *matrixf; - int i; - float *lut_red; - float *lut_green; - float *lut_blue; - - double matrix[9]; - babl_matrix_mul_matrix ( - (conversion->conversion.destination)->format.space->space.XYZtoRGB, - (conversion->conversion.source)->format.space->space.RGBtoXYZ, - matrix); - - matrixf = babl_calloc (sizeof (float), 9 + 256 * 3); // we leak this matrix , which is a singleton - babl_matrix_to_float (matrix, matrixf); - conversion->conversion.data = matrixf; - - lut_red = matrixf + 9; - lut_green = lut_red + 256; - lut_blue = lut_green + 256; - for (i = 0; i < 256; i++) - { - lut_red[i] = babl_trc_to_linear (source_space->space.trc[0], i/255.0); - lut_green[i] = babl_trc_to_linear (source_space->space.trc[1], i/255.0); - lut_blue[i] = babl_trc_to_linear (source_space->space.trc[2], i/255.0); - } -} - -#define TRC_IN(rgba_in, rgba_out) do{ int i;\ - for (i = 0; i < samples; i++) \ - { \ - rgba_out[i*4+3] = rgba_in[i*4+3]; \ - } \ - if ((source_space->space.trc[0] == source_space->space.trc[1]) && \ - (source_space->space.trc[1] == source_space->space.trc[2])) \ - { \ - const Babl *trc = (void*)source_space->space.trc[0]; \ - babl_trc_to_linear_buf(trc, rgba_in, rgba_out, 4, 4, 3, samples); \ - } \ - else \ - { \ - int c; \ - for (c = 0; c < 3; c ++) \ - { \ - const Babl *trc = (void*)source_space->space.trc[c]; \ - babl_trc_to_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, 1, samples); \ - } \ - } \ -}while(0) - -#define TRC_OUT(rgba_in, rgba_out) do{\ - { \ - int c; \ - if ((destination_space->space.trc[0] == destination_space->space.trc[1]) && \ - (destination_space->space.trc[1] == destination_space->space.trc[2])) \ - { \ - const Babl *trc = (void*)destination_space->space.trc[0]; \ - babl_trc_from_linear_buf(trc, rgba_in, rgba_out, 4, 4, 3, samples); \ - } \ - else \ - { \ - for (c = 0; c < 3; c ++) \ - { \ - const Babl *trc = (void*)destination_space->space.trc[c]; \ - babl_trc_from_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, 1, samples); \ - } \ - } \ - }\ -} while(0) - - - - -static inline void -universal_nonlinear_rgba_converter (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - const Babl *source_space = babl_conversion_get_source_space (conversion); - const Babl *destination_space = babl_conversion_get_destination_space (conversion); - - float * matrixf = data; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - - TRC_IN(rgba_in, rgba_out); - - babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples); - - TRC_OUT(rgba_out, rgba_out); -} - -static inline void -universal_nonlinear_rgb_linear_converter (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - const Babl *source_space = babl_conversion_get_source_space (conversion); - float * matrixf = data; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - - TRC_IN(rgba_in, rgba_out); - - babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples); -} - -static inline void -universal_linear_rgb_nonlinear_converter (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - const Babl *destination_space = conversion->conversion.destination->format.space; - float * matrixf = data; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - - babl_matrix_mul_vectorff_buf4 (matrixf, rgba_in, rgba_out, samples); - - TRC_OUT(rgba_out, rgba_out); -} - -static inline void -universal_nonlinear_rgba_u8_converter (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - const Babl *destination_space = conversion->conversion.destination->format.space; - - float * matrixf = data; - float * in_trc_lut_red = matrixf + 9; - float * in_trc_lut_green = in_trc_lut_red + 256; - float * in_trc_lut_blue = in_trc_lut_green + 256; - int i; - uint8_t *rgba_in_u8 = (void*)src_char; - uint8_t *rgba_out_u8 = (void*)dst_char; - - float *rgb = babl_malloc (sizeof(float) * 4 * samples); - - for (i = 0; i < samples; i++) - { - rgb[i*4+0]=in_trc_lut_red[rgba_in_u8[i*4+0]]; - rgb[i*4+1]=in_trc_lut_green[rgba_in_u8[i*4+1]]; - rgb[i*4+2]=in_trc_lut_blue[rgba_in_u8[i*4+2]]; - rgba_out_u8[i*4+3] = rgba_in_u8[i*4+3]; - } - - babl_matrix_mul_vectorff_buf4 (matrixf, rgb, rgb, samples); - - { - const Babl *from_trc_red = (void*)destination_space->space.trc[0]; - const Babl *from_trc_green = (void*)destination_space->space.trc[1]; - const Babl *from_trc_blue = (void*)destination_space->space.trc[2]; - for (i = 0; i < samples * 4; i+=4) - { - rgba_out_u8[i+0] = babl_trc_from_linear (from_trc_red, rgb[i+0]) * 255.5f; - rgba_out_u8[i+1] = babl_trc_from_linear (from_trc_green, rgb[i+1]) * 255.5f; - rgba_out_u8[i+2] = babl_trc_from_linear (from_trc_blue, rgb[i+2]) * 255.5f; - } - } - babl_free (rgb); -} - - -static inline void -universal_rgba_converter (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - float *matrixf = data; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - - babl_matrix_mul_vectorff_buf4 (matrixf, rgba_in, rgba_out, samples); -} - -static inline void -universal_rgb_converter (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - float *matrixf = data; - float *rgb_in = (void*)src_char; - float *rgb_out = (void*)dst_char; - - babl_matrix_mul_vectorff_buf3 (matrixf, rgb_in, rgb_out, samples); -} - - -static inline void -universal_ya_converter (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - memcpy (dst_char, src_char, samples * 4 * 2); -} - -static inline void -universal_y_converter (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - memcpy (dst_char, src_char, samples * 4); -} - - -static inline void -universal_nonlinear_rgb_u8_converter (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - const Babl *destination_space = conversion->conversion.destination->format.space; - - float * matrixf = data; - float * in_trc_lut_red = matrixf + 9; - float * in_trc_lut_green = in_trc_lut_red + 256; - float * in_trc_lut_blue = in_trc_lut_green + 256; - int i; - uint8_t *rgb_in_u8 = (void*)src_char; - uint8_t *rgb_out_u8 = (void*)dst_char; - - float *rgba_out = babl_malloc (sizeof(float) * 4 * samples); - - for (i = 0; i < samples; i++) - { - rgba_out[i*4+0]=in_trc_lut_red[rgb_in_u8[i*3+0]]; - rgba_out[i*4+1]=in_trc_lut_green[rgb_in_u8[i*3+1]]; - rgba_out[i*4+2]=in_trc_lut_blue[rgb_in_u8[i*3+2]]; - rgba_out[i*4+3]=rgb_in_u8[i*3+2] * 255.5f; - } - - babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples); - - { - int c; - TRC_OUT(rgba_out, rgba_out); - - for (i = 0; i < samples; i++) - for (c = 0; c < 3; c ++) - rgb_out_u8[i*3+c] = rgba_out[i*4+c] * 255.5f; - } - - babl_free (rgba_out); -} - - -#if defined(USE_SSE2) - -#define m(matr, j, i) matr[j*3+i] - -#include - -static inline void babl_matrix_mul_vectorff_buf4_sse2 (const float *mat, - const float *v_in, - float *v_out, - int samples) -{ - const __v4sf m___0 = {m(mat, 0, 0), m(mat, 1, 0), m(mat, 2, 0), 0}; - const __v4sf m___1 = {m(mat, 0, 1), m(mat, 1, 1), m(mat, 2, 1), 0}; - const __v4sf m___2 = {m(mat, 0, 2), m(mat, 1, 2), m(mat, 2, 2), 1}; - int i; - for (i = 0; i < samples; i ++) - { - __v4sf a, b, c = _mm_load_ps(&v_in[0]); - a = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(0,0,0,0)); - b = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(1,1,1,1)); - c = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(3,2,2,2)); - _mm_store_ps (v_out, m___0 * a + m___1 * b + m___2 * c); - v_out += 4; - v_in += 4; - } - _mm_empty (); -} - -#undef m - - -static inline void -universal_nonlinear_rgba_converter_sse2 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - const Babl *source_space = babl_conversion_get_source_space (conversion); - const Babl *destination_space = babl_conversion_get_destination_space (conversion); - float * matrixf = data; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - - TRC_IN(rgba_in, rgba_out); - - babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples); - - TRC_OUT(rgba_out, rgba_out); -} - - -static inline void -universal_rgba_converter_sse2 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - float *matrixf = data; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - - babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_in, rgba_out, samples); -} - -static inline void -universal_nonlinear_rgba_u8_converter_sse2 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - const Babl *destination_space = conversion->conversion.destination->format.space; - - float * matrixf = data; - float * in_trc_lut_red = matrixf + 9; - float * in_trc_lut_green = in_trc_lut_red + 256; - float * in_trc_lut_blue = in_trc_lut_green + 256; - int i; - uint8_t *rgba_in_u8 = (void*)src_char; - uint8_t *rgba_out_u8 = (void*)dst_char; - - float *rgba_out = babl_malloc (sizeof(float) * 4 * samples); - - for (i = 0; i < samples * 4; i+= 4) - { - rgba_out[i+0]=in_trc_lut_red[rgba_in_u8[i+0]]; - rgba_out[i+1]=in_trc_lut_green[rgba_in_u8[i+1]]; - rgba_out[i+2]=in_trc_lut_blue[rgba_in_u8[i+2]]; - rgba_out_u8[i+3] = rgba_in_u8[i+3]; - } - - babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples); - - { - int c; - TRC_OUT(rgba_out, rgba_out); - - for (i = 0; i < samples * 4; i+= 4) - for (c = 0; c < 3; c ++) - rgba_out_u8[i+c] = rgba_out[i+c] * 255.5f; - } - - babl_free (rgba_out); -} - -static inline void -universal_nonlinear_rgb_u8_converter_sse2 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - const Babl *destination_space = conversion->conversion.destination->format.space; - - float * matrixf = data; - float * in_trc_lut_red = matrixf + 9; - float * in_trc_lut_green = in_trc_lut_red + 256; - float * in_trc_lut_blue = in_trc_lut_green + 256; - int i; - uint8_t *rgb_in_u8 = (void*)src_char; - uint8_t *rgb_out_u8 = (void*)dst_char; - - float *rgba_out = babl_malloc (sizeof(float) * 4 * samples); - - for (i = 0; i < samples; i++) - { - rgba_out[i*4+0]=in_trc_lut_red[rgb_in_u8[i*3+0]]; - rgba_out[i*4+1]=in_trc_lut_green[rgb_in_u8[i*3+1]]; - rgba_out[i*4+2]=in_trc_lut_blue[rgb_in_u8[i*3+2]]; - } - - babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples); - - { - int c; - TRC_OUT(rgba_out, rgba_out); - - for (i = 0; i < samples; i++) - for (c = 0; c < 3; c ++) - rgb_out_u8[i*3+c] = rgba_out[i*4+c] * 255.5f; - } - - babl_free (rgba_out); -} - - -static inline void -universal_nonlinear_rgb_linear_converter_sse2 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - const Babl *source_space = babl_conversion_get_source_space (conversion); - float * matrixf = data; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - - TRC_IN(rgba_in, rgba_out); - - babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples); -} - - -static inline void -universal_linear_rgb_nonlinear_converter_sse2 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, - long samples, - void *data) -{ - const Babl *destination_space = conversion->conversion.destination->format.space; - float * matrixf = data; - float *rgba_in = (void*)src_char; - float *rgba_out = (void*)dst_char; - - babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_in, rgba_out, samples); - - TRC_OUT(rgba_out, rgba_out); -} -#endif - - -static int -add_rgb_adapter (Babl *babl, - void *space) -{ - if (babl != space) - { - -#if defined(USE_SSE2) - if ((babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE) && - (babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE2)) - { - - - prep_conversion(babl_conversion_new( - babl_format_with_space("RGBA float", space), - babl_format_with_space("RGBA float", babl), - "linear", universal_rgba_converter_sse2, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("RGBA float", babl), - babl_format_with_space("RGBA float", space), - "linear", universal_rgba_converter_sse2, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B'A float", space), - babl_format_with_space("R'G'B'A float", babl), - "linear", universal_nonlinear_rgba_converter_sse2, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B'A float", babl), - babl_format_with_space("R'G'B'A float", space), - "linear", universal_nonlinear_rgba_converter_sse2, - NULL)); - - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B'A float", space), - babl_format_with_space("RGBA float", babl), - "linear", universal_nonlinear_rgb_linear_converter_sse2, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B'A float", babl), - babl_format_with_space("RGBA float", space), - "linear", universal_nonlinear_rgb_linear_converter_sse2, - NULL)); - - prep_conversion(babl_conversion_new( - babl_format_with_space("RGBA float", babl), - babl_format_with_space("R'G'B'A float", space), - "linear", universal_linear_rgb_nonlinear_converter_sse2, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("RGBA float", space), - babl_format_with_space("R'G'B'A float", babl), - "linear", universal_linear_rgb_nonlinear_converter_sse2, - NULL)); - - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B'A u8", space), - babl_format_with_space("R'G'B'A u8", babl), - "linear", universal_nonlinear_rgba_u8_converter_sse2, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B'A u8", babl), - babl_format_with_space("R'G'B'A u8", space), - "linear", universal_nonlinear_rgba_u8_converter_sse2, - NULL)); - - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B' u8", space), - babl_format_with_space("R'G'B' u8", babl), - "linear", universal_nonlinear_rgb_u8_converter_sse2, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B' u8", babl), - babl_format_with_space("R'G'B' u8", space), - "linear", universal_nonlinear_rgb_u8_converter_sse2, - NULL)); - } - //else -#endif - { - prep_conversion(babl_conversion_new( - babl_format_with_space("RGBA float", space), - babl_format_with_space("RGBA float", babl), - "linear", universal_rgba_converter, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("RGBA float", babl), - babl_format_with_space("RGBA float", space), - "linear", universal_rgba_converter, - NULL)); - - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B'A float", space), - babl_format_with_space("R'G'B'A float", babl), - "linear", universal_nonlinear_rgba_converter, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B'A float", babl), - babl_format_with_space("R'G'B'A float", space), - "linear", universal_nonlinear_rgba_converter, - NULL)); - - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B'A float", space), - babl_format_with_space("RGBA float", babl), - "linear", universal_nonlinear_rgb_linear_converter, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B'A float", babl), - babl_format_with_space("RGBA float", space), - "linear", universal_nonlinear_rgb_linear_converter, - NULL)); - - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B'A u8", space), - babl_format_with_space("R'G'B'A u8", babl), - "linear", universal_nonlinear_rgba_u8_converter, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B'A u8", babl), - babl_format_with_space("R'G'B'A u8", space), - "linear", universal_nonlinear_rgba_u8_converter, - NULL)); - - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B' u8", space), - babl_format_with_space("R'G'B' u8", babl), - "linear", universal_nonlinear_rgb_u8_converter, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("R'G'B' u8", babl), - babl_format_with_space("R'G'B' u8", space), - "linear", universal_nonlinear_rgb_u8_converter, - NULL)); - - prep_conversion(babl_conversion_new( - babl_format_with_space("RGBA float", babl), - babl_format_with_space("R'G'B'A float", space), - "linear", universal_linear_rgb_nonlinear_converter, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("RGBA float", space), - babl_format_with_space("R'G'B'A float", babl), - "linear", universal_linear_rgb_nonlinear_converter, - NULL)); - } - - prep_conversion(babl_conversion_new( - babl_format_with_space("RGB float", space), - babl_format_with_space("RGB float", babl), - "linear", universal_rgb_converter, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("RGB float", babl), - babl_format_with_space("RGB float", space), - "linear", universal_rgb_converter, - NULL)); - - prep_conversion(babl_conversion_new( - babl_format_with_space("Y float", space), - babl_format_with_space("Y float", babl), - "linear", universal_y_converter, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("YaA float", babl), - babl_format_with_space("YaA float", space), - "linear", universal_ya_converter, - NULL)); - prep_conversion(babl_conversion_new( - babl_format_with_space("YA float", babl), - babl_format_with_space("YA float", space), - "linear", universal_ya_converter, - NULL)); - } - return 0; -} - -/* The first time a new Babl space is used - for creation of a fish, is when - * this function is called, it adds conversions hooks that provides its formats - * with conversions internally as well as for conversions to and from other RGB - * spaces. - */ -void -_babl_space_add_universal_rgb (const Babl *space) -{ - babl_space_class_for_each (add_rgb_adapter, (void*)space); -} const Babl * diff --git a/babl/babl-trc.c b/babl/babl-trc.c deleted file mode 100644 index d8a69dd..0000000 --- a/babl/babl-trc.c +++ /dev/null @@ -1,708 +0,0 @@ -/* babl - dynamically extendable universal pixel conversion library. - * Copyright (C) 2017 Øyvind KolÃ¥s. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General - * Public License along with this library; if not, see - * . - */ - -#define MAX_TRCS 100 - -/* FIXME: choose parameters more intelligently */ -#define POLY_GAMMA_X0 ( 0.5 / 255.0) -#define POLY_GAMMA_X1 (254.5 / 255.0) -#define POLY_GAMMA_DEGREE 6 -#define POLY_GAMMA_SCALE 2 - -#include "config.h" -#include "babl-internal.h" -#include "base/util.h" - -static BablTRC trc_db[MAX_TRCS]; - -static inline float -_babl_trc_linear (const Babl *trc_, - float value) -{ - return value; -} - -static inline float -babl_trc_lut_from_linear (const Babl *trc_, - float x) -{ - BablTRC *trc = (void*)trc_; - int entry; - float ret, diff; - - entry = x * (trc->lut_size-1); - diff = ( (x * (trc->lut_size-1)) - entry); - - if (entry >= trc->lut_size -1) - { - entry = trc->lut_size - 1; - diff = 0.0; - } - else if (entry < 0) entry = 0; - - if (diff > 0.0) - { - ret = trc->inv_lut[entry] * (1.0 - diff) + trc->inv_lut[entry+1] * diff; - } - else - { - ret = trc->inv_lut[entry]; - } - return ret; -} - -static inline float -babl_trc_lut_to_linear (const Babl *trc_, - float x) -{ - BablTRC *trc = (void*)trc_; - int entry; - float ret, diff; - - entry = x * (trc->lut_size-1); - diff = ( (x * (trc->lut_size-1)) - entry); - - if (entry >= trc->lut_size) entry = trc->lut_size - 1; - else if (entry < 0) entry = 0; - - if (diff > 0.0 && entry < trc->lut_size - 1) - { - ret = trc->lut[entry] * (1.0 - diff) + trc->lut[entry+1] * diff; - } - else - { - ret = trc->lut[entry]; - } - return ret; -} - -static inline float -_babl_trc_gamma_to_linear (const Babl *trc_, - float value) -{ - BablTRC *trc = (void*)trc_; - if (value >= trc->poly_gamma_to_linear_x0 && - value <= trc->poly_gamma_to_linear_x1) - { - return babl_polynomial_eval (&trc->poly_gamma_to_linear, value); - } - else if (value > 0.0f) - { - return powf (value, trc->gamma); - } - return 0.0f; -} - -static inline float -_babl_trc_gamma_from_linear (const Babl *trc_, - float value) -{ - BablTRC *trc = (void*)trc_; - if (value >= trc->poly_gamma_from_linear_x0 && - value <= trc->poly_gamma_from_linear_x1) - { - return babl_polynomial_eval (&trc->poly_gamma_from_linear, value); - } - else if (value > 0.0f) - { - return powf (value, trc->rgamma); - } - return 0.0f; -} - -static inline void -_babl_trc_gamma_to_linear_buf (const Babl *trc_, - const float *in, - float *out, - int in_gap, - int out_gap, - int components, - int count) -{ - int i, c; - for (i = 0; i < count; i ++) - for (c = 0; c < components; c ++) - out[out_gap * i + c] = _babl_trc_gamma_to_linear (trc_, in[in_gap *i + c]); -} - -static inline void -_babl_trc_gamma_from_linear_buf (const Babl *trc_, - const float *in, - float *out, - int in_gap, - int out_gap, - int components, - int count) -{ - int i, c; - for (i = 0; i < count; i ++) - for (c = 0; c < components; c ++) - out[out_gap * i + c] = _babl_trc_gamma_from_linear (trc_, in[in_gap *i + c]); -} - -static inline float -_babl_trc_formula_srgb_from_linear (const Babl *trc_, - float value) -{ - BablTRC *trc = (void*)trc_; - float x= value; - float a = trc->lut[1]; - float b = trc->lut[2]; - float c = trc->lut[3]; - float d = trc->lut[4]; - float e = trc->lut[5]; - float f = trc->lut[6]; - - if (x - f > c * d) // XXX: verify that this math is the correct inverse - { - float v = _babl_trc_gamma_from_linear ((Babl *) trc, x - f); - v = (v-b)/a; - if (v < 0.0 || v >= 0.0) - return v; - return 0.0; - } - if (c > 0.0) - return (x - e) / c; - return 0.0; -} - -static inline float -_babl_trc_formula_srgb_to_linear (const Babl *trc_, - float value) -{ - BablTRC *trc = (void*)trc_; - float x= value; - float a = trc->lut[1]; - float b = trc->lut[2]; - float c = trc->lut[3]; - float d = trc->lut[4]; - float e = trc->lut[5]; - float f = trc->lut[6]; - - if (x >= d) - { - return _babl_trc_gamma_to_linear ((Babl *) trc, a * x + b) + e; - } - return c * x + f; -} -static inline float -_babl_trc_formula_cie_from_linear (const Babl *trc_, - float value) -{ - BablTRC *trc = (void*)trc_; - float x= value; - float a = trc->lut[1]; - float b = trc->lut[2]; - float c = trc->lut[3]; - - if (x > c) - { - float v = _babl_trc_gamma_from_linear ((Babl *) trc, x - c); - v = (v-b)/a; - if (v < 0.0 || v >= 0.0) - return v; - } - return 0.0; -} - -static inline float -_babl_trc_formula_cie_to_linear (const Babl *trc_, - float value) -{ - BablTRC *trc = (void*)trc_; - float x= value; - float a = trc->lut[1]; - float b = trc->lut[2]; - float c = trc->lut[3]; - - if (x >= -b / a) - { - return _babl_trc_gamma_to_linear ((Babl *) trc, a * x + b) + c; - } - return c; -} - - - -static inline float -_babl_trc_srgb_to_linear (const Babl *trc_, - float value) -{ - return babl_gamma_2_2_to_linearf (value); -} - -static inline float -_babl_trc_srgb_from_linear (const Babl *trc_, - float value) -{ - return babl_linear_to_gamma_2_2f (value); -} - -static inline void -_babl_trc_srgb_to_linear_buf (const Babl *trc_, - const float *in, - float *out, - int in_gap, - int out_gap, - int components, - int count) -{ - int i, c; - for (i = 0; i < count; i ++) - for (c = 0; c < components; c++) - out[out_gap * i + c] = babl_gamma_2_2_to_linearf (in[in_gap * i + c]); -} - -static inline void -_babl_trc_srgb_from_linear_buf (const Babl *trc_, - const float *in, - float *out, - int in_gap, - int out_gap, - int components, - int count) -{ - int i, c; - for (i = 0; i < count; i ++) - for (c = 0; c < components; c++) - out[out_gap * i + c] = babl_linear_to_gamma_2_2f (in[in_gap * i + c]); -} - -static inline void -_babl_trc_to_linear_buf_generic (const Babl *trc_, - const float *in, - float *out, - int in_gap, - int out_gap, - int components, - int count) -{ - int i, c; - BablTRC *trc = (void*)trc_; - for (i = 0; i < count; i ++) - for (c = 0; c < components; c ++) - out[out_gap * i + c] = trc->fun_to_linear (trc_, in[in_gap * i + c]); -} - -static inline void -_babl_trc_from_linear_buf_generic (const Babl *trc_, - const float *in, - float *out, - int in_gap, - int out_gap, - int components, - int count) -{ - int i, c; - BablTRC *trc = (void*)trc_; - for (i = 0; i < count; i ++) - for (c = 0; c < components; c ++) - out[out_gap * i + c] = trc->fun_from_linear (trc_, in[in_gap * i + c]); -} - -static inline void _babl_trc_linear_buf (const Babl *trc_, - const float *in, - float *out, - int in_gap, - int out_gap, - int components, - int count) -{ - int i, c; - for (i = 0; i < count; i ++) - for (c = 0; c < components; c ++) - out[i * out_gap + c] = in[i * in_gap + c]; -} - - -const Babl * -babl_trc (const char *name) -{ - int i; - for (i = 0; trc_db[i].instance.class_type; i++) - if (!strcmp (trc_db[i].instance.name, name)) - { - return (Babl*)&trc_db[i]; - } - babl_log("failed to find trc '%s'\n", name); - return NULL; -} - -const Babl * -babl_trc_new (const char *name, - BablTRCType type, - double gamma, - int n_lut, - float *lut) -{ - int i=0; - static BablTRC trc; - trc.instance.class_type = BABL_TRC; - trc.instance.id = 0; - trc.type = type; - trc.gamma = gamma > 0.0 ? gamma : 0.0; - trc.rgamma = gamma > 0.0001 ? 1.0 / gamma : 0.0; - - if (n_lut ) - { - for (i = 0; trc_db[i].instance.class_type; i++) - { - if ( trc_db[i].lut_size == n_lut && - (memcmp (trc_db[i].lut, lut, sizeof (float) * n_lut)==0) - ) - { - return (void*)&trc_db[i]; - } - } - } - else - for (i = 0; trc_db[i].instance.class_type; i++) - { - int offset = ((char*)&trc_db[i].type) - (char*)(&trc_db[i]); - int size = ((char*)&trc_db[i].gamma + sizeof(double)) - ((char*)&trc_db[i].type); - - if (memcmp ((char*)(&trc_db[i]) + offset, ((char*)&trc) + offset, size)==0) - { - return (void*)&trc_db[i]; - } - } - if (i >= MAX_TRCS-1) - { - babl_log ("too many BablTRCs"); - return NULL; - } - trc_db[i]=trc; - trc_db[i].instance.name = trc_db[i].name; - if (name) - snprintf (trc_db[i].name, sizeof (trc_db[i].name), "%s", name); - else if (n_lut) - snprintf (trc_db[i].name, sizeof (trc_db[i].name), "lut-trc"); - else - snprintf (trc_db[i].name, sizeof (trc_db[i].name), "trc-%i-%f", type, gamma); - - if (n_lut) - { - int j; - trc_db[i].lut_size = n_lut; - trc_db[i].lut = babl_calloc (sizeof (float), n_lut); - memcpy (trc_db[i].lut, lut, sizeof (float) * n_lut); - trc_db[i].inv_lut = babl_calloc (sizeof (float), n_lut); - - for (j = 0; j < n_lut; j++) - { - int k; - double min = 0.0; - double max = 1.0; - for (k = 0; k < 16; k++) - { - double guess = (min + max) / 2; - float reversed_index = babl_trc_lut_to_linear (BABL(&trc_db[i]), guess) * (n_lut-1.0); - - if (reversed_index < j) - { - min = guess; - } - else if (reversed_index > j) - { - max = guess; - } - } - trc_db[i].inv_lut[j] = (min + max) / 2; - } - } - - trc_db[i].fun_to_linear_buf = _babl_trc_to_linear_buf_generic; - trc_db[i].fun_from_linear_buf = _babl_trc_from_linear_buf_generic; - - switch (trc_db[i].type) - { - case BABL_TRC_LINEAR: - trc_db[i].fun_to_linear = _babl_trc_linear; - trc_db[i].fun_from_linear = _babl_trc_linear; - trc_db[i].fun_from_linear_buf = _babl_trc_linear_buf; - trc_db[i].fun_to_linear_buf = _babl_trc_linear_buf; - break; - case BABL_TRC_FORMULA_GAMMA: - trc_db[i].fun_to_linear = _babl_trc_gamma_to_linear; - trc_db[i].fun_from_linear = _babl_trc_gamma_from_linear; - trc_db[i].fun_to_linear_buf = _babl_trc_gamma_to_linear_buf; - trc_db[i].fun_from_linear_buf = _babl_trc_gamma_from_linear_buf; - - trc_db[i].poly_gamma_to_linear_x0 = POLY_GAMMA_X0; - trc_db[i].poly_gamma_to_linear_x1 = POLY_GAMMA_X1; - babl_polynomial_approximate_gamma (&trc_db[i].poly_gamma_to_linear, - trc_db[i].gamma, - trc_db[i].poly_gamma_to_linear_x0, - trc_db[i].poly_gamma_to_linear_x1, - POLY_GAMMA_DEGREE, POLY_GAMMA_SCALE); - - trc_db[i].poly_gamma_from_linear_x0 = POLY_GAMMA_X0; - trc_db[i].poly_gamma_from_linear_x1 = POLY_GAMMA_X1; - babl_polynomial_approximate_gamma (&trc_db[i].poly_gamma_from_linear, - trc_db[i].rgamma, - trc_db[i].poly_gamma_from_linear_x0, - trc_db[i].poly_gamma_from_linear_x1, - POLY_GAMMA_DEGREE, POLY_GAMMA_SCALE); - break; - case BABL_TRC_FORMULA_CIE: - trc_db[i].lut = babl_calloc (sizeof (float), 4); - { - int j; - for (j = 0; j < 4; j++) - trc_db[i].lut[j] = lut[j]; - } - trc_db[i].fun_to_linear = _babl_trc_formula_cie_to_linear; - trc_db[i].fun_from_linear = _babl_trc_formula_cie_from_linear; - - trc_db[i].poly_gamma_to_linear_x0 = lut[4]; - trc_db[i].poly_gamma_to_linear_x1 = POLY_GAMMA_X1; - babl_polynomial_approximate_gamma (&trc_db[i].poly_gamma_to_linear, - trc_db[i].gamma, - trc_db[i].poly_gamma_to_linear_x0, - trc_db[i].poly_gamma_to_linear_x1, - POLY_GAMMA_DEGREE, POLY_GAMMA_SCALE); - - trc_db[i].poly_gamma_from_linear_x0 = lut[3] * lut[4]; - trc_db[i].poly_gamma_from_linear_x1 = POLY_GAMMA_X1; - babl_polynomial_approximate_gamma (&trc_db[i].poly_gamma_from_linear, - trc_db[i].rgamma, - trc_db[i].poly_gamma_from_linear_x0, - trc_db[i].poly_gamma_from_linear_x1, - POLY_GAMMA_DEGREE, POLY_GAMMA_SCALE); - break; - - case BABL_TRC_FORMULA_SRGB: - trc_db[i].lut = babl_calloc (sizeof (float), 7); - { - int j; - for (j = 0; j < 7; j++) - trc_db[i].lut[j] = lut[j]; - } - trc_db[i].fun_to_linear = _babl_trc_formula_srgb_to_linear; - trc_db[i].fun_from_linear = _babl_trc_formula_srgb_from_linear; - - trc_db[i].poly_gamma_to_linear_x0 = lut[4]; - trc_db[i].poly_gamma_to_linear_x1 = POLY_GAMMA_X1; - babl_polynomial_approximate_gamma (&trc_db[i].poly_gamma_to_linear, - trc_db[i].gamma, - trc_db[i].poly_gamma_to_linear_x0, - trc_db[i].poly_gamma_to_linear_x1, - POLY_GAMMA_DEGREE, POLY_GAMMA_SCALE); - - trc_db[i].poly_gamma_from_linear_x0 = lut[3] * lut[4]; - trc_db[i].poly_gamma_from_linear_x1 = POLY_GAMMA_X1; - babl_polynomial_approximate_gamma (&trc_db[i].poly_gamma_from_linear, - trc_db[i].rgamma, - trc_db[i].poly_gamma_from_linear_x0, - trc_db[i].poly_gamma_from_linear_x1, - POLY_GAMMA_DEGREE, POLY_GAMMA_SCALE); - break; - case BABL_TRC_SRGB: - trc_db[i].fun_to_linear = _babl_trc_srgb_to_linear; - trc_db[i].fun_from_linear = _babl_trc_srgb_from_linear; - trc_db[i].fun_from_linear_buf = _babl_trc_srgb_from_linear_buf; - trc_db[i].fun_to_linear_buf = _babl_trc_srgb_to_linear_buf; - break; - case BABL_TRC_LUT: - trc_db[i].fun_to_linear = babl_trc_lut_to_linear; - trc_db[i].fun_from_linear = babl_trc_lut_from_linear; - break; - } - return (Babl*)&trc_db[i]; -} - -const Babl * -babl_trc_lut (const char *name, - int n, - float *entries) -{ - return babl_trc_new (name, BABL_TRC_LUT, 0, n, entries); -} - -void -babl_trc_class_for_each (BablEachFunction each_fun, - void *user_data) -{ - int i=0; - for (i = 0; trc_db[i].instance.class_type; i++) - if (each_fun (BABL (&trc_db[i]), user_data)) - return; -} - -const Babl * -babl_trc_formula_srgb (double g, - double a, - double b, - double c, - double d, - double e, - double f) -{ - char name[128]; - int i; - float params[7]={g, a, b, c, d, e, f}; - - if (fabs (g - 2.400) < 0.01 && - fabs (a - 0.947) < 0.01 && - fabs (b - 0.052) < 0.01 && - fabs (c - 0.077) < 0.01 && - fabs (d - 0.040) < 0.01 && - fabs (e - 0.000) < 0.01 && - fabs (f - 0.000) < 0.01 - ) - return babl_trc ("sRGB"); - - snprintf (name, sizeof (name), "%.6f %.6f %.4f %.4f %.4f %.4f %.4f", g, a, b, c, d, e, f); - for (i = 0; name[i]; i++) - if (name[i] == ',') name[i] = '.'; - while (name[strlen(name)-1]=='0') - name[strlen(name)-1]='\0'; - return babl_trc_new (name, BABL_TRC_FORMULA_SRGB, g, 0, params); -} - -const Babl * -babl_trc_formula_cie (double g, - double a, - double b, - double c) -{ - char name[128]; - int i; - float params[4]={g, a, b, c}; - - snprintf (name, sizeof (name), "%.6f %.6f %.4f %.4f", g, a, b, c); - for (i = 0; name[i]; i++) - if (name[i] == ',') name[i] = '.'; - while (name[strlen(name)-1]=='0') - name[strlen(name)-1]='\0'; - return babl_trc_new (name, BABL_TRC_FORMULA_CIE, g, 0, params); -} - - -const Babl * -babl_trc_gamma (double gamma) -{ - char name[32]; - int i; - if (fabs (gamma - 1.0) < 0.01) - return babl_trc_new ("linear", BABL_TRC_LINEAR, 1.0, 0, NULL); - - snprintf (name, sizeof (name), "%.6f", gamma); - for (i = 0; name[i]; i++) - if (name[i] == ',') name[i] = '.'; - while (name[strlen(name)-1]=='0') - name[strlen(name)-1]='\0'; - return babl_trc_new (name, BABL_TRC_FORMULA_GAMMA, gamma, 0, NULL); -} - -void -babl_trc_class_init (void) -{ - babl_trc_new ("sRGB", BABL_TRC_SRGB, 2.2, 0, NULL); - babl_trc_gamma (2.2); - babl_trc_gamma (1.8); - babl_trc_gamma (1.0); - babl_trc_new ("linear", BABL_TRC_LINEAR, 1.0, 0, NULL); -} - -#if 0 -float -babl_trc_from_linear (const Babl *trc_, - float value) -{ - return babl_trc_from_linear (trc_, value); -} - -float -babl_trc_to_linear (const Babl *trc_, - float value) -{ - return babl_trc_to_linear (trc_, value); -} -#endif - -static int -babl_lut_match_gamma (float *lut, - int lut_size, - float gamma) -{ - int match = 1; - int i; - if (lut_size > 1024) - { - for (i = 0; match && i < lut_size; i++) - { - if (fabs (lut[i] - pow ((i / (lut_size-1.0)), gamma)) > 0.0001) - match = 0; - } - } - else - { - for (i = 0; match && i < lut_size; i++) - { - if (fabs (lut[i] - pow ((i / (lut_size-1.0)), gamma)) > 0.001) - match = 0; - } - } - return match; -} - -const Babl * -babl_trc_lut_find (float *lut, - int lut_size) -{ - int i; - int match = 1; - - /* look for linear match */ - for (i = 0; match && i < lut_size; i++) - if (fabs (lut[i] - i / (lut_size-1.0)) > 0.015) - match = 0; - if (match) - return babl_trc_gamma (1.0); - - /* look for sRGB match: */ - match = 1; - if (lut_size > 1024) - { - for (i = 0; match && i < lut_size; i++) - { - if (fabs (lut[i] - gamma_2_2_to_linear (i / (lut_size-1.0))) > 0.0001) - match = 0; - } - } - else - { - for (i = 0; match && i < lut_size; i++) - { - if (fabs (lut[i] - gamma_2_2_to_linear (i / (lut_size-1.0))) > 0.001) - match = 0; - } - } - if (match) - return babl_trc ("sRGB"); - - if (babl_lut_match_gamma (lut, lut_size, 2.2)) - return babl_trc_gamma(2.2); - - if (babl_lut_match_gamma (lut, lut_size, 1.8)) - return babl_trc_gamma(1.8); - - return NULL; -} - diff --git a/babl/babl-trc.h b/babl/babl-trc.h deleted file mode 100644 index cfe742c..0000000 --- a/babl/babl-trc.h +++ /dev/null @@ -1,107 +0,0 @@ -/* babl - dynamically extendable universal pixel conversion library. - * Copyright (C) 2017, Øyvind KolÃ¥s and others. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General - * Public License along with this library; if not, see - * . - */ - -#ifndef _BABL_TRC_H -#define _BABL_TRC_H - -#include -#include -#include "base/util.h" -#include "babl-polynomial.h" - -BABL_CLASS_DECLARE (trc); - -typedef enum {BABL_TRC_LINEAR, - BABL_TRC_FORMULA_GAMMA, - BABL_TRC_SRGB, - BABL_TRC_FORMULA_SRGB, - BABL_TRC_LUT, - BABL_TRC_FORMULA_CIE} -BablTRCType; - -typedef struct -{ - BablInstance instance; - BablTRCType type; - int lut_size; - double gamma; - float rgamma; - float (*fun_to_linear)(const Babl *trc, float val); - float (*fun_from_linear)(const Babl *trc, float val); - - void (*fun_to_linear_buf)(const Babl *trc, - const float *in, - float *out, - int in_gap, - int out_gap, - int components, - int count); - void (*fun_from_linear_buf)(const Babl *trc, - const float *in, - float *out, - int in_gap, - int out_gap, - int components, - int count); - BablPolynomial poly_gamma_to_linear; - float poly_gamma_to_linear_x0; - float poly_gamma_to_linear_x1; - BablPolynomial poly_gamma_from_linear; - float poly_gamma_from_linear_x0; - float poly_gamma_from_linear_x1; - float *lut; - float *inv_lut; - char name[128]; -} BablTRC; - -static inline void babl_trc_from_linear_buf (const Babl *trc_, - const float *in, float *out, - int in_gap, int out_gap, - int components, - int count) -{ - BablTRC *trc = (void*)trc_; - trc->fun_from_linear_buf (trc_, in, out, in_gap, out_gap, components, count); -} - -static inline void babl_trc_to_linear_buf (const Babl *trc_, - const float *in, float *out, - int in_gap, int out_gap, - int components, - int count) -{ - BablTRC *trc = (void*)trc_; - trc->fun_to_linear_buf (trc_, in, out, in_gap, out_gap, components, count); -} - -static inline float babl_trc_from_linear (const Babl *trc_, float value) -{ - BablTRC *trc = (void*)trc_; - return trc->fun_from_linear (trc_, value); -} - -static inline float babl_trc_to_linear (const Babl *trc_, float value) -{ - BablTRC *trc = (void*)trc_; - return trc->fun_to_linear (trc_, value); -} - -void -babl_trc_class_init (void); - -#endif diff --git a/babl/babl.c b/babl/babl.c index fd90323..515fa09 100644 --- a/babl/babl.c +++ b/babl/babl.c @@ -18,6 +18,7 @@ #include "config.h" #include "babl-internal.h" +#include "babl-base.h" static int ref_count = 0; @@ -125,10 +126,13 @@ babl_dir_list (void) return ret; } + +static void simd_init (void); void babl_init (void) { babl_cpu_accel_set_use (1); + simd_init (); if (ref_count++ == 0) { @@ -190,3 +194,105 @@ babl_model_is (const Babl *babl, return babl && ((babl)==babl_model_with_space(model, babl)); } + +#include "babl-cpuaccel.h" +void (*babl_base_init) (void) = babl_base_init_generic; + +const Babl * babl_trc_lookup_by_name_generic (const char *name); + + +const Babl * +babl_trc_new_generic (const char *name, + BablTRCType type, + double gamma, + int n_lut, + float *lut); + +void _babl_space_add_universal_rgb_generic (const Babl *space); +void (*_babl_space_add_universal_rgb) (const Babl *space) = + _babl_space_add_universal_rgb_generic; + +const Babl * +(*babl_trc_lookup_by_name) (const char *name) = babl_trc_lookup_by_name_generic; +const Babl * +(*babl_trc_new) (const char *name, + BablTRCType type, + double gamma, + int n_lut, + float *lut) = babl_trc_new_generic; + +#ifdef ARCH_X86_64 +void babl_base_init_x86_64_v2 (void); +void babl_base_init_x86_64_v3 (void); +void _babl_space_add_universal_rgb_x86_64_v2 (const Babl *space); +void _babl_space_add_universal_rgb_x86_64_v3 (const Babl *space); + +const Babl * +babl_trc_lookup_by_name_x86_64_v2 (const char *name); +const Babl * +babl_trc_lookup_by_name_x86_64_v3 (const char *name); + +const Babl * +babl_trc_new_x86_64_v2 (const char *name, + BablTRCType type, + double gamma, + int n_lut, + float *lut); +const Babl * +babl_trc_new_x86_64_v3 (const char *name, + BablTRCType type, + double gamma, + int n_lut, + float *lut); + +#endif +#ifdef ARCH_ARM +void babl_base_init_arm_neon (void); +void _babl_space_add_universal_rgb_arm_neon (const Babl *space); + +const Babl * +babl_trc_lookup_by_name_arm_neon (const char *name); + +const Babl * +babl_trc_new_arm_neon (const char *name, + BablTRCType type, + double gamma, + int n_lut, + float *lut); + +#endif + +static void simd_init (void) +{ +#ifdef ARCH_X86_64 + BablCpuAccelFlags accel = babl_cpu_accel_get_support (); + if ((accel & BABL_CPU_ACCEL_X86_64_V3) == BABL_CPU_ACCEL_X86_64_V3) + { + babl_base_init = babl_base_init_x86_64_v2; /// !! + // this is correct, + // it performs better + // as observed in benchmarking + babl_trc_new = babl_trc_new_x86_64_v2; + babl_trc_lookup_by_name = babl_trc_lookup_by_name_x86_64_v2; + _babl_space_add_universal_rgb = _babl_space_add_universal_rgb_x86_64_v3; + } + else if ((accel & BABL_CPU_ACCEL_X86_64_V2) == BABL_CPU_ACCEL_X86_64_V2) + { + babl_base_init = babl_base_init_x86_64_v2; + babl_trc_new = babl_trc_new_x86_64_v2; + babl_trc_lookup_by_name = babl_trc_lookup_by_name_x86_64_v2; + _babl_space_add_universal_rgb = _babl_space_add_universal_rgb_x86_64_v2; + } +#endif +#ifdef ARCH_ARM + BablCpuAccelFlags accel = babl_cpu_accel_get_support (); + if ((accel & BABL_CPU_ACCEL_ARM_NEON) == BABL_CPU_ACCEL_ARM_NEON) + { + babl_base_init = babl_base_init_arm_neon; + babl_trc_new = babl_trc_new_arm_neon; + babl_trc_lookup_by_name = babl_trc_lookup_by_name_arm_neon; + _babl_space_add_universal_rgb = _babl_space_add_universal_rgb_arm_neon; + } +#endif +} + diff --git a/babl/base/babl-base.h b/babl/base/babl-base.h index 67c4a53..291697b 100644 --- a/babl/base/babl-base.h +++ b/babl/base/babl-base.h @@ -19,6 +19,9 @@ #ifndef _BABL_BASE_H #define _BABL_BASE_H +#ifdef ARM_NEON +#define BABL_SIMD_SUFFIX(symbol) symbol##_arm_neon +#else #ifdef X86_64_V2 #define BABL_SIMD_SUFFIX(symbol) symbol##_x86_64_v2 #else @@ -28,22 +31,9 @@ #define BABL_SIMD_SUFFIX(symbol) symbol##_generic #endif #endif +#endif extern void (*babl_base_init) (void); -extern void (*babl_base_destroy) (void); -extern void (*babl_formats_init) (void); - -extern void (*babl_base_type_half) (void); -extern void (*babl_base_type_float) (void); -extern void (*babl_base_type_u8) (void); -extern void (*babl_base_type_u16) (void); -extern void (*babl_base_type_u15) (void); -extern void (*babl_base_type_u32) (void); - -extern void (*babl_base_model_rgb) (void); -extern void (*babl_base_model_cmyk) (void); -extern void (*babl_base_model_gray) (void); -extern void (*babl_base_model_ycbcr) (void); void BABL_SIMD_SUFFIX(babl_base_init) (void); void BABL_SIMD_SUFFIX(babl_base_destroy) (void); diff --git a/babl/base/meson.build b/babl/base/meson.build index 41287c0..886e1ae 100644 --- a/babl/base/meson.build +++ b/babl/base/meson.build @@ -14,6 +14,8 @@ babl_base_sources = [ 'type-u16.c', 'type-u32.c', 'type-u8.c', + 'babl-trc.c', + 'babl-rgb-converter.c', ] babl_base = static_library('babl_base', @@ -29,14 +31,26 @@ if host_cpu_family == 'x86_64' babl_base_sources, include_directories: [rootInclude, bablInclude], dependencies: [math, lcms], - c_args: common_c_flags + x86_64_v2_flags + '-DX86_64_V2' + c_args: common_c_flags + x86_64_v2_flags ) babl_base_x86_64_v3 = static_library('babl_base-x86-64-v3', babl_base_sources, include_directories: [rootInclude, bablInclude], dependencies: [math, lcms], - c_args: common_c_flags + x86_64_v3_flags + '-DX86_64_V3' + c_args: common_c_flags + x86_64_v3_flags + ) + +endif + + +if host_cpu_family == 'arm' + + babl_base_arm_neon = static_library('babl_base-arm-neon', + babl_base_sources, + include_directories: [rootInclude, bablInclude], + dependencies: [math, lcms], + c_args: common_c_flags + arm_neon_flags ) endif diff --git a/babl/meson.build b/babl/meson.build index b9b2a05..d432dca 100644 --- a/babl/meson.build +++ b/babl/meson.build @@ -89,7 +89,6 @@ babl_sources = files( 'babl-sampling.c', 'babl-sanity.c', 'babl-space.c', - 'babl-trc.c', 'babl-type.c', 'babl-util.c', 'babl-version.c', @@ -123,8 +122,8 @@ babl_includes = [rootInclude, bablBaseInclude] if host_cpu_family == 'x86_64' simd_extra = [babl_base_x86_64_v2, babl_base_x86_64_v3] -#elif host_cpu_family == 'arm' -# simd_extra = [babl_base_arm_neon] +elif host_cpu_family == 'arm' + simd_extra = [babl_base_arm_neon] else simd_extra = [] endif diff --git a/extensions/CIE.c b/extensions/CIE.c index 1607b27..ecf0241 100644 --- a/extensions/CIE.c +++ b/extensions/CIE.c @@ -70,10 +70,12 @@ static void conversions (void); static void formats (void); int init (void); +#include "babl-verify-cpu.inc" int init (void) { + BABL_VERIFY_CPU(); types (); components (); models (); diff --git a/extensions/HSL.c b/extensions/HSL.c index bf48f34..73b2f03 100644 --- a/extensions/HSL.c +++ b/extensions/HSL.c @@ -63,7 +63,6 @@ hue2cpn (double p, int init (void); - int init (void) { diff --git a/extensions/cairo.c b/extensions/cairo.c index 08ccf67..3af4a9b 100644 --- a/extensions/cairo.c +++ b/extensions/cairo.c @@ -28,8 +28,8 @@ int init (void); static void conv_rgba8_cairo24_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -49,8 +49,8 @@ conv_rgba8_cairo24_le (const Babl *conversion, static void conv_rgb8_cairo24_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -71,8 +71,8 @@ conv_rgb8_cairo24_le (const Babl *conversion, #if 0 static void conv_rgbA8_cairo32_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -93,8 +93,8 @@ conv_rgbA8_cairo32_le (const Babl *conversion, static void conv_rgbA8_cairo32_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -115,8 +115,8 @@ conv_rgbA8_cairo32_le (const Babl *conversion, static void conv_cairo32_rgbA8_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -136,8 +136,8 @@ conv_cairo32_rgbA8_le (const Babl *conversion, static void conv_cairo32_rgba8_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -184,8 +184,8 @@ conv_cairo32_rgba8_le (const Babl *conversion, static void conv_cairo32_rgbAF_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst_char, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst_char, long samples) { long n = samples; @@ -207,8 +207,8 @@ conv_cairo32_rgbAF_le (const Babl *conversion, static void conv_cairo32_rgbaF_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst_char, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst_char, long samples) { long n = samples; @@ -236,8 +236,8 @@ conv_cairo32_rgbaF_le (const Babl *conversion, static void conv_cairo24_cairo32_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -253,8 +253,8 @@ conv_cairo24_cairo32_le (const Babl *conversion, static void conv_rgba8_cairo32_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -295,8 +295,8 @@ conv_rgba8_cairo32_le (const Babl *conversion, static void conv_rgb8_cairo32_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -318,8 +318,8 @@ conv_rgb8_cairo32_le (const Babl *conversion, static void conv_yA8_cairo32_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -342,8 +342,8 @@ conv_yA8_cairo32_le (const Babl *conversion, static void conv_yA16_cairo32_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -362,8 +362,8 @@ conv_yA16_cairo32_le (const Babl *conversion, static void conv_y8_cairo32_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -379,8 +379,8 @@ conv_y8_cairo32_le (const Babl *conversion, static void conv_y16_cairo32_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -400,8 +400,8 @@ conv_y16_cairo32_le (const Babl *conversion, static void conv_rgbA_gamma_float_cairo32_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { float *fsrc = (float *) src; @@ -424,8 +424,8 @@ conv_rgbA_gamma_float_cairo32_le (const Babl *conversion, static void conv_rgbafloat_cairo32_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); @@ -473,8 +473,8 @@ conv_rgbafloat_cairo32_le (const Babl *conversion, static void conv_yafloat_cairo32_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); @@ -517,8 +517,8 @@ conv_yafloat_cairo32_le (const Babl *conversion, static void conv_yafloat_nl_cairo32_le (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { float *fsrc = (float *) src; @@ -556,6 +556,7 @@ conv_yafloat_nl_cairo32_le (const Babl *conversion, } } +#include "babl-verify-cpu.inc" int init (void) @@ -563,6 +564,7 @@ init (void) int testint = 23; char *testchar = (char*) &testint; int littleendian = (testchar[0] == 23); + BABL_VERIFY_CPU(); if (littleendian) { diff --git a/extensions/double.c b/extensions/double.c index fe29cd9..21fc581 100644 --- a/extensions/double.c +++ b/extensions/double.c @@ -222,10 +222,13 @@ conv_rgbD_linear_rgbaD_linear (const Babl *conversion, babl_conversion_new (src, dst, "linear", conv_ ## src ## _ ## dst, NULL) int init (void); +#include "babl-verify-cpu.inc" int init (void) { + BABL_VERIFY_CPU(); + { const Babl *rgbaD_linear = babl_format_new ( babl_model ("RGBA"), babl_type ("double"), @@ -288,6 +291,7 @@ init (void) o (rgbaD_linear, rgbD_linear); o (rgbaD_gamma, rgbD_gamma); + } return 0; } diff --git a/extensions/fast-float.c b/extensions/fast-float.c index 8730046..451e20d 100644 --- a/extensions/fast-float.c +++ b/extensions/fast-float.c @@ -589,11 +589,14 @@ conv_rgbF_gamma_rgbF_linear (const Babl *conversion, #define o(src, dst) \ babl_conversion_new (src, dst, "linear", conv_ ## src ## _ ## dst, NULL) -int init (void); +#include "babl-verify-cpu.inc" +int init (void); int init (void) { + BABL_VERIFY_CPU(); + { const Babl *yaF_linear = babl_format_new ( babl_model ("YA"), babl_type ("float"), @@ -713,6 +716,7 @@ init (void) o (rgbF_linear, rgbF_gamma); o (rgbF_gamma, rgbF_linear); o (yaF_linear, rgbA8_gamma); + } return 0; } diff --git a/extensions/float.c b/extensions/float.c index cd34421..74b5a8a 100644 --- a/extensions/float.c +++ b/extensions/float.c @@ -31,8 +31,8 @@ static const Babl *trc_srgb = NULL; static void conv_yaF_linear_yAF_linear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { float *fsrc = (float *) src; @@ -52,8 +52,8 @@ conv_yaF_linear_yAF_linear (const Babl *conversion, static void conv_yAF_linear_yaF_linear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { float *fsrc = (float *) src; @@ -73,8 +73,8 @@ conv_yAF_linear_yaF_linear (const Babl *conversion, static void conv_yaF_linear_yAF_nonlinear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); @@ -96,8 +96,8 @@ conv_yaF_linear_yAF_nonlinear (const Babl *conversion, static void conv_rgbaF_linear_rgbAF_nonlinear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); @@ -121,8 +121,8 @@ conv_rgbaF_linear_rgbAF_nonlinear (const Babl *conversion, static void conv_rgbaF_linear_rgbAF_perceptual (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { float *fsrc = (float *) src; @@ -144,8 +144,8 @@ conv_rgbaF_linear_rgbAF_perceptual (const Babl *conversion, static void conv_rgbAF_linear_rgbAF_nonlinear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); @@ -180,8 +180,8 @@ conv_rgbAF_linear_rgbAF_nonlinear (const Babl *conversion, static void conv_yAF_linear_yAF_nonlinear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); @@ -215,8 +215,8 @@ conv_yAF_linear_yAF_nonlinear (const Babl *conversion, static void conv_rgbAF_linear_rgbAF_perceptual (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { @@ -249,8 +249,8 @@ conv_rgbAF_linear_rgbAF_perceptual (const Babl *conversion, static void conv_yaF_linear_yaF_nonlinear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); @@ -258,19 +258,13 @@ conv_yaF_linear_yaF_nonlinear (const Babl *conversion, float *fsrc = (float *) src; float *fdst = (float *) dst; - int n = samples; - - while (n--) - { - *fdst++ = babl_trc_from_linear (trc[0], *fsrc++); - *fdst++ = *fsrc++; - } + babl_trc_from_linear_buf (trc[0], fsrc, fdst, 2, 2, 1, samples); } static void conv_rgbaF_linear_rgbaF_nonlinear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); @@ -291,46 +285,33 @@ conv_rgbaF_linear_rgbaF_nonlinear (const Babl *conversion, static void conv_rgbaF_linear_rgbaF_perceptual (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { float *fsrc = (float *) src; float *fdst = (float *) dst; - int n = samples; - - while (n--) - { - *fdst++ = babl_trc_from_linear (trc_srgb, *fsrc++); - *fdst++ = babl_trc_from_linear (trc_srgb, *fsrc++); - *fdst++ = babl_trc_from_linear (trc_srgb, *fsrc++); - *fdst++ = *fsrc++; - } + babl_trc_from_linear_buf (trc_srgb, fsrc, fdst, 4, 4, 3, samples); } static void conv_yF_linear_yF_nonlinear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); const Babl **trc = (void*)space->space.trc; float *fsrc = (float *) src; float *fdst = (float *) dst; - int n = samples; - - while (n--) - { - *fdst++ = babl_trc_from_linear (trc[0], *fsrc++); - } + babl_trc_from_linear_buf (trc[0], fsrc, fdst, 1, 1, 1, samples); } static void conv_rgbF_linear_rgbF_nonlinear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); @@ -349,26 +330,19 @@ conv_rgbF_linear_rgbF_nonlinear (const Babl *conversion, static void conv_rgbF_linear_rgbF_perceptual (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { float *fsrc = (float *) src; float *fdst = (float *) dst; - int n = samples; - - while (n--) - { - *fdst++ = babl_trc_from_linear (trc_srgb, *fsrc++); - *fdst++ = babl_trc_from_linear (trc_srgb, *fsrc++); - *fdst++ = babl_trc_from_linear (trc_srgb, *fsrc++); - } + babl_trc_from_linear_buf (trc_srgb, fsrc, fdst, 3, 3, 3, samples); } static void conv_rgbaF_nonlinear_rgbaF_linear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); @@ -389,48 +363,34 @@ conv_rgbaF_nonlinear_rgbaF_linear (const Babl *conversion, static void conv_yaF_nonlinear_yaF_linear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); const Babl **trc = (void*)space->space.trc; float *fsrc = (float *) src; float *fdst = (float *) dst; - int n = samples; - - while (n--) - { - *fdst++ = babl_trc_to_linear (trc[0], *fsrc++); - *fdst++ = *fsrc++; - } + babl_trc_to_linear_buf (trc[0], fsrc, fdst, 2, 2, 1, samples); } static void conv_rgbaF_perceptual_rgbaF_linear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { float *fsrc = (float *) src; float *fdst = (float *) dst; - int n = samples; - - while (n--) - { - *fdst++ = babl_trc_to_linear (trc_srgb, *fsrc++); - *fdst++ = babl_trc_to_linear (trc_srgb, *fsrc++); - *fdst++ = babl_trc_to_linear (trc_srgb, *fsrc++); - *fdst++ = *fsrc++; - } + babl_trc_to_linear_buf (trc_srgb, fsrc, fdst, 4, 4, 3, samples); } static void conv_rgbF_nonlinear_rgbF_linear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); @@ -450,38 +410,27 @@ conv_rgbF_nonlinear_rgbF_linear (const Babl *conversion, static void conv_yF_nonlinear_yF_linear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_destination_space (conversion); const Babl **trc = (void*)space->space.trc; float *fsrc = (float *) src; float *fdst = (float *) dst; - int n = samples; - while (n--) - { - *fdst++ = babl_trc_to_linear (trc[0], *fsrc++); - } + babl_trc_to_linear_buf (trc[0], fsrc, fdst, 1, 1, 1, samples); } static void conv_rgbF_perceptual_rgbF_linear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { float *fsrc = (float *) src; float *fdst = (float *) dst; - int n = samples; - - while (n--) - { - *fdst++ = babl_trc_to_linear (trc_srgb, *fsrc++); - *fdst++ = babl_trc_to_linear (trc_srgb, *fsrc++); - *fdst++ = babl_trc_to_linear (trc_srgb, *fsrc++); - } + babl_trc_to_linear_buf (trc_srgb, fsrc, fdst, 3, 3, 3, samples); } @@ -489,10 +438,13 @@ conv_rgbF_perceptual_rgbF_linear (const Babl *conversion, babl_conversion_new (src, dst, "linear", conv_ ## src ## _ ## dst, NULL) int init (void); +#include "babl-verify-cpu.inc" int init (void) { + BABL_VERIFY_CPU(); + { const Babl *yaF_linear = babl_format_new ( babl_model ("YA"), babl_type ("float"), @@ -621,7 +573,7 @@ init (void) o (rgbaF_perceptual, rgbaF_linear); o (rgbF_linear, rgbF_perceptual); o (rgbF_perceptual, rgbF_linear); - + } return 0; } diff --git a/extensions/gegl-fixups.c b/extensions/gegl-fixups.c index 45888ce..881b785 100644 --- a/extensions/gegl-fixups.c +++ b/extensions/gegl-fixups.c @@ -524,10 +524,13 @@ conv_rgba8_rgb8 (const Babl *conversion, #define conv_gamma_rgbAF_gamma_rgbaF conv_rgbAF_rgbaF int init (void); +#include "babl-verify-cpu.inc" int init (void) { + BABL_VERIFY_CPU(); + { const Babl *rgbaF = babl_format_new ( babl_model ("RGBA"), babl_type ("float"), @@ -621,5 +624,6 @@ init (void) o (rgba8, rgb8); o (ga8, rgbaF); + } return 0; } diff --git a/extensions/gggl-lies.c b/extensions/gggl-lies.c index 09c4a90..4a2e9cd 100644 --- a/extensions/gggl-lies.c +++ b/extensions/gggl-lies.c @@ -50,8 +50,8 @@ static void conv_F_8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -78,8 +78,8 @@ conv_F_8 (const Babl *conversion, static void conv_F_16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -108,8 +108,8 @@ conv_F_16 (const Babl *conversion, static void conv_8_F (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -124,8 +124,8 @@ conv_8_F (const Babl *conversion, static void conv_16_F (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -140,8 +140,8 @@ conv_16_F (const Babl *conversion, static void conv_F_D (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -156,8 +156,8 @@ conv_F_D (const Babl *conversion, static void conv_D_F (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -172,8 +172,8 @@ conv_D_F (const Babl *conversion, static void conv_16_8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -200,8 +200,8 @@ conv_16_8 (const Babl *conversion, static void conv_8_16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -217,8 +217,8 @@ conv_8_16 (const Babl *conversion, /*********/ static void conv_rgbaF_rgba8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_8 (conversion, src, dst, samples * 4); @@ -228,8 +228,8 @@ conv_rgbaF_rgba8 (const Babl *conversion, static void conv_rgbF_rgb8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_8 (conversion, src, dst, samples * 3); @@ -237,8 +237,8 @@ conv_rgbF_rgb8 (const Babl *conversion, static void conv_gaF_ga8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_8 (conversion, src, dst, samples * 2); @@ -250,8 +250,8 @@ conv_gaF_ga8 (const Babl *conversion, static void conv_rgbaF_rgba16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_16 (conversion, src, dst, samples * 4); @@ -259,8 +259,8 @@ conv_rgbaF_rgba16 (const Babl *conversion, static void conv_rgbaF_rgbaD (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_D (conversion, src, dst, samples * 4); @@ -268,8 +268,8 @@ conv_rgbaF_rgbaD (const Babl *conversion, static void conv_rgbaD_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_D_F (conversion, src, dst, samples * 4); @@ -277,8 +277,8 @@ conv_rgbaD_rgbaF (const Babl *conversion, static void conv_rgbF_rgb16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_16 (conversion, src, dst, samples * 3); @@ -286,8 +286,8 @@ conv_rgbF_rgb16 (const Babl *conversion, static void conv_gaF_ga16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_16 (conversion, src, dst, samples * 2); @@ -301,8 +301,8 @@ conv_gaF_ga16 (const Babl *conversion, static void conv_rgba8_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_8_F (conversion, src, dst, samples * 4); @@ -310,8 +310,8 @@ conv_rgba8_rgbaF (const Babl *conversion, static void conv_rgb8_rgbF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_8_F (conversion, src, dst, samples * 3); @@ -319,8 +319,8 @@ conv_rgb8_rgbF (const Babl *conversion, static void conv_ga8_gaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_8_F (conversion, src, dst, samples * 2); @@ -332,8 +332,8 @@ conv_ga8_gaF (const Babl *conversion, static void conv_rgba16_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_16_F (conversion, src, dst, samples * 4); @@ -341,8 +341,8 @@ conv_rgba16_rgbaF (const Babl *conversion, static void conv_rgb16_rgbF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_16_F (conversion, src, dst, samples * 3); @@ -350,8 +350,8 @@ conv_rgb16_rgbF (const Babl *conversion, static void conv_ga16_gaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_16_F (conversion, src, dst, samples * 2); @@ -363,8 +363,8 @@ conv_ga16_gaF (const Babl *conversion, static void conv_rgba16_rgba8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_16_8 (conversion, src, dst, samples * 4); @@ -372,8 +372,8 @@ conv_rgba16_rgba8 (const Babl *conversion, static void conv_rgb16_rgb8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_16_8 (conversion, src, dst, samples * 3); @@ -381,8 +381,8 @@ conv_rgb16_rgb8 (const Babl *conversion, static void conv_ga16_ga8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_16_8 (conversion, src, dst, samples * 2); @@ -394,8 +394,8 @@ conv_ga16_ga8 (const Babl *conversion, static void conv_rgba8_rgba16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_8_16 (conversion, src, dst, samples * 4); @@ -403,8 +403,8 @@ conv_rgba8_rgba16 (const Babl *conversion, static void conv_rgb8_rgb16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_8_16 (conversion, src, dst, samples * 3); @@ -412,8 +412,8 @@ conv_rgb8_rgb16 (const Babl *conversion, static void conv_ga8_ga16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_8_16 (conversion, src, dst, samples * 2); @@ -427,8 +427,8 @@ conv_ga8_ga16 (const Babl *conversion, static void conv_gaF_gAF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -448,8 +448,8 @@ conv_gaF_gAF (const Babl *conversion, static void conv_gAF_gaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -474,8 +474,8 @@ conv_gAF_gaF (const Babl *conversion, static void conv_rgbaF_rgbF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -498,8 +498,8 @@ conv_rgbaF_rgbF (const Babl *conversion, static void conv_gF_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -522,8 +522,8 @@ conv_gF_rgbaF (const Babl *conversion, static void conv_rgbF_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -546,8 +546,8 @@ conv_rgbF_rgbaF (const Babl *conversion, static void conv_gaF_gF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -563,8 +563,8 @@ conv_gaF_gF (const Babl *conversion, static void conv_gF_gaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -587,8 +587,8 @@ conv_gF_gaF (const Babl *conversion, static void conv_gF_rgbF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -608,8 +608,8 @@ conv_gF_rgbF (const Babl *conversion, static void conv_gaF_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -636,8 +636,8 @@ conv_gaF_rgbaF (const Babl *conversion, static void conv_rgbaF_rgbA8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -661,8 +661,8 @@ conv_rgbaF_rgbA8 (const Babl *conversion, static void conv_rgbaF_rgb8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -683,8 +683,8 @@ conv_rgbaF_rgb8 (const Babl *conversion, static void conv_rgbaF_rgb16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -705,8 +705,8 @@ conv_rgbaF_rgb16 (const Babl *conversion, static void conv_rgba8_rgbA8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -736,8 +736,8 @@ conv_rgba8_rgbA8 (const Babl *conversion, static void conv_rgbA8_rgba8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -768,8 +768,8 @@ conv_rgbA8_rgba8 (const Babl *conversion, static void conv_rgb8_rgba8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -790,8 +790,8 @@ conv_rgb8_rgba8 (const Babl *conversion, static void conv_rgba8_rgb8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -807,10 +807,13 @@ conv_rgba8_rgb8 (const Babl *conversion, } int init (void); +#include "babl-verify-cpu.inc" int init (void) { + BABL_VERIFY_CPU(); + { const Babl *rgbaF = babl_format_new ( babl_model ("RGBA"), babl_type ("float"), @@ -1012,6 +1015,6 @@ init (void) o (rgb8, rgbA8); o (rgba8, rgb8); o (rgbaF, rgbA8); - + } return 0; } diff --git a/extensions/gggl.c b/extensions/gggl.c index 34068f1..06843ea 100644 --- a/extensions/gggl.c +++ b/extensions/gggl.c @@ -30,6 +30,7 @@ #include "babl.h" #include "extensions/util.h" +#include "babl-verify-cpu.inc" /* * Implemented according to information read from: @@ -51,8 +52,8 @@ static void conv_F_8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -73,8 +74,8 @@ conv_F_8 (const Babl *conversion, static void conv_F_16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -101,8 +102,8 @@ conv_F_16 (const Babl *conversion, static void conv_8_F (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -117,8 +118,8 @@ conv_8_F (const Babl *conversion, static void conv_16_F (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -133,8 +134,8 @@ conv_16_F (const Babl *conversion, static void conv_rgbaF_rgb8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -161,8 +162,8 @@ conv_rgbaF_rgb8 (const Babl *conversion, static void conv_F_D (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -177,8 +178,8 @@ conv_F_D (const Babl *conversion, static void conv_D_F (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -193,8 +194,8 @@ conv_D_F (const Babl *conversion, static void conv_16_8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -221,8 +222,8 @@ conv_16_8 (const Babl *conversion, static inline void conv_8_16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -238,8 +239,8 @@ conv_8_16 (const Babl *conversion, /*********/ static void conv_rgbaF_rgba8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_8 (conversion, src, dst, samples * 4); @@ -247,8 +248,8 @@ conv_rgbaF_rgba8 (const Babl *conversion, static void conv_rgbF_rgb8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_8 (conversion, src, dst, samples * 3); @@ -256,8 +257,8 @@ conv_rgbF_rgb8 (const Babl *conversion, static void conv_gaF_ga8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_8 (conversion, src, dst, samples * 2); @@ -269,8 +270,8 @@ conv_gaF_ga8 (const Babl *conversion, static void conv_rgbaF_rgba16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_16 (conversion, src, dst, samples * 4); @@ -278,8 +279,8 @@ conv_rgbaF_rgba16 (const Babl *conversion, static void conv_rgbF_rgb16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_16 (conversion, src, dst, samples * 3); @@ -287,8 +288,8 @@ conv_rgbF_rgb16 (const Babl *conversion, static void conv_gaF_ga16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_16 (conversion, src, dst, samples * 2); @@ -300,8 +301,8 @@ conv_gaF_ga16 (const Babl *conversion, static void conv_rgba8_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_8_F (conversion, src, dst, samples * 4); @@ -310,8 +311,8 @@ conv_rgba8_rgbaF (const Babl *conversion, static void conv_rgb8_rgbF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_8_F (conversion, src, dst, samples * 3); @@ -319,8 +320,8 @@ conv_rgb8_rgbF (const Babl *conversion, static void conv_ga8_gaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_8_F (conversion, src, dst, samples * 2); @@ -332,8 +333,8 @@ conv_ga8_gaF (const Babl *conversion, static void conv_rgbaF_rgbaD (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_F_D (conversion, src, dst, samples * 4); @@ -341,8 +342,8 @@ conv_rgbaF_rgbaD (const Babl *conversion, static void conv_rgbaD_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_D_F (conversion, src, dst, samples * 4); @@ -350,8 +351,8 @@ conv_rgbaD_rgbaF (const Babl *conversion, static void conv_rgba16_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_16_F (conversion, src, dst, samples * 4); @@ -359,8 +360,8 @@ conv_rgba16_rgbaF (const Babl *conversion, static void conv_rgb16_rgbF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_16_F (conversion, src, dst, samples * 3); @@ -368,8 +369,8 @@ conv_rgb16_rgbF (const Babl *conversion, static void conv_ga16_gaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_16_F (conversion, src, dst, samples * 2); @@ -381,8 +382,8 @@ conv_ga16_gaF (const Babl *conversion, static void conv_rgba16_rgba8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_16_8 (conversion, src, dst, samples * 4); @@ -390,8 +391,8 @@ conv_rgba16_rgba8 (const Babl *conversion, static void conv_rgb16_rgb8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_16_8 (conversion, src, dst, samples * 3); @@ -399,8 +400,8 @@ conv_rgb16_rgb8 (const Babl *conversion, static void conv_ga16_ga8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_16_8 (conversion, src, dst, samples * 2); @@ -412,8 +413,8 @@ conv_ga16_ga8 (const Babl *conversion, static void conv_rgba8_rgba16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_8_16 (conversion, src, dst, samples * 4); @@ -421,8 +422,8 @@ conv_rgba8_rgba16 (const Babl *conversion, static void conv_rgb8_rgb16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_8_16 (conversion, src, dst, samples * 3); @@ -430,8 +431,8 @@ conv_rgb8_rgb16 (const Babl *conversion, static void conv_ga8_ga16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { conv_8_16 (conversion, src, dst, samples * 2); @@ -445,8 +446,8 @@ conv_ga8_ga16 (const Babl *conversion, static void conv_gaF_gAF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -466,8 +467,8 @@ conv_gaF_gAF (const Babl *conversion, static void conv_gAF_gaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -492,8 +493,8 @@ conv_gAF_gaF (const Babl *conversion, static void conv_rgbaF_rgbF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -515,8 +516,8 @@ conv_rgbaF_rgbF (const Babl *conversion, static void conv_rgbF_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -537,8 +538,8 @@ conv_rgbF_rgbaF (const Babl *conversion, static void conv_gaF_gF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -554,8 +555,8 @@ conv_gaF_gF (const Babl *conversion, static void conv_gF_gaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -578,8 +579,8 @@ conv_gF_gaF (const Babl *conversion, static void conv_gF_rgbF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -599,8 +600,8 @@ conv_gF_rgbF (const Babl *conversion, static void conv_g8_rgb8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -617,8 +618,8 @@ conv_g8_rgb8 (const Babl *conversion, #define conv_g8_rgbA8 conv_g8_rgba8 static void conv_g8_rgba8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -636,8 +637,8 @@ conv_g8_rgba8 (const Babl *conversion, static void conv_gaF_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -665,8 +666,8 @@ conv_gaF_rgbaF (const Babl *conversion, static void conv_rgbaF_rgbA8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -690,8 +691,8 @@ conv_rgbaF_rgbA8 (const Babl *conversion, static void conv_rgbaF_rgb16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -717,8 +718,8 @@ conv_rgbaF_rgb16 (const Babl *conversion, static void conv_rgbA16_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -748,8 +749,8 @@ conv_rgbA16_rgbaF (const Babl *conversion, static void conv_gF_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -772,8 +773,8 @@ conv_gF_rgbaF (const Babl *conversion, /* static void - conv_rgb8_rgbaF (unsigned char *src, - unsigned char *dst, + conv_rgb8_rgbaF (unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, int samples) { long n=samples; @@ -791,8 +792,8 @@ conv_gF_rgbaF (const Babl *conversion, } static void - conv_g8_rgbaF (unsigned char *src, - unsigned char *dst, + conv_g8_rgbaF (unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, int samples) { long n=samples; @@ -810,8 +811,8 @@ conv_gF_rgbaF (const Babl *conversion, } static void - conv_rgb16_rgbaF (unsigned char *src, - unsigned char *dst, + conv_rgb16_rgbaF (unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, int samples) { long n=samples; @@ -830,8 +831,8 @@ conv_gF_rgbaF (const Babl *conversion, } static void - conv_gF_rgbaF (unsigned char *src, - unsigned char *dst, + conv_gF_rgbaF (unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, int samples) { long n=samples; @@ -851,8 +852,8 @@ conv_gF_rgbaF (const Babl *conversion, */ static void conv_rgba8_rgbA8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -882,8 +883,8 @@ conv_rgba8_rgbA8 (const Babl *conversion, static void conv_rgbA8_rgba8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -917,8 +918,8 @@ conv_rgbA8_rgba8 (const Babl *conversion, static void conv_rgb8_rgba8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples-1; @@ -938,8 +939,8 @@ conv_rgb8_rgba8 (const Babl *conversion, static void conv_rgba8_rgb8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -956,8 +957,8 @@ conv_rgba8_rgb8 (const Babl *conversion, static void conv_rgbA8_rgb8 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { long n = samples; @@ -1014,8 +1015,8 @@ conv_rgbA8_rgb8 (const Babl *conversion, static void conv_yuvaF_rgbaF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { float *src_f = (float *) src; @@ -1048,8 +1049,8 @@ conv_yuvaF_rgbaF (const Babl *conversion, static void conv_yuvF_rgbF (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { float *src_f = (float *) src; @@ -1083,6 +1084,8 @@ int init (void); int init (void) { + BABL_VERIFY_CPU(); + { const Babl *rgbaD = babl_format_new ( babl_model ("R'G'B'A"), babl_type ("double"), @@ -1298,5 +1301,6 @@ init (void) o (rgbaF, rgbaD); o (rgbaD, rgbaF); + } return 0; } diff --git a/extensions/grey.c b/extensions/grey.c index 3f20842..416e3dc 100644 --- a/extensions/grey.c +++ b/extensions/grey.c @@ -26,8 +26,8 @@ static void conv_rgbaF_linear_y8_linear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_source_space (conversion); @@ -55,8 +55,8 @@ conv_rgbaF_linear_y8_linear (const Babl *conversion, static void conv_rgbaF_linear_yF_linear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_source_space (conversion); @@ -82,8 +82,8 @@ conv_rgbaF_linear_yF_linear (const Babl *conversion, static void conv_rgbaF_linear_yaF_linear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { const Babl *space = babl_conversion_get_source_space (conversion); @@ -109,8 +109,8 @@ conv_rgbaF_linear_yaF_linear (const Babl *conversion, static void conv_yaF_linear_rgbaF_linear (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) { float *s = (float *) src; @@ -130,10 +130,13 @@ conv_yaF_linear_rgbaF_linear (const Babl *conversion, int init (void); +#include "babl-verify-cpu.inc" int init (void) { + BABL_VERIFY_CPU(); + { babl_conversion_new (babl_format ("RGBA float"), babl_format ("Y u8"), "linear", @@ -171,5 +174,6 @@ init (void) conv_yaF_linear_rgbaF_linear, NULL); + } return 0; } diff --git a/extensions/half.c b/extensions/half.c index f308e03..789caf6 100644 --- a/extensions/half.c +++ b/extensions/half.c @@ -412,10 +412,13 @@ conv2_rgbaF_rgbaHalf (const Babl *conversion, #define conv_yAHalf_yAF conv_yaHalf_yaF int init (void); +#include "babl-verify-cpu.inc" int init (void) { + BABL_VERIFY_CPU(); + { int i; const Babl *rgbaF_linear = babl_format_new ( babl_model ("RGBA"), @@ -617,5 +620,6 @@ init (void) CONV2(yaF, yaHalf); CONV2(yF, yHalf); + } return 0; } diff --git a/extensions/meson.build b/extensions/meson.build index df75a23..fe7dcc0 100644 --- a/extensions/meson.build +++ b/extensions/meson.build @@ -21,6 +21,20 @@ if platform_win32 babl_ext_link_args += no_undefined endif +autosimd_extensions = [ + ['u16', no_cflags], + ['u32', no_cflags], + ['cairo', no_cflags], + ['grey', no_cflags], + ['gggl', no_cflags], + ['gggl-lies', no_cflags], + ['gegl-fixups', no_cflags], + ['CIE', sse2_cflags], + ['float', no_cflags], + ['double', no_cflags], + ['simple', no_cflags], + ['ycbcr', no_cflags], +] extensions = [ ['u16', no_cflags], @@ -68,3 +82,37 @@ foreach ext : extensions install_dir: babl_libdir / lib_name, ) endforeach + +if host_cpu_family == 'x86_64' + + foreach ext : autosimd_extensions + shared_library( + 'x86-64-v2-' + ext[0], + ext[0] + '.c', + c_args: [ext[1]] + x86_64_v2_flags, + include_directories: babl_ext_inc, + link_with: babl, + link_args: babl_ext_link_args, + dependencies: babl_ext_dep, + name_prefix: '', + install: true, + install_dir: babl_libdir / lib_name, + ) + endforeach + + foreach ext : autosimd_extensions + shared_library( + 'x86-64-v3-' + ext[0], + ext[0] + '.c', + c_args: [ext[1]] + x86_64_v3_flags, + include_directories: babl_ext_inc, + link_with: babl, + link_args: babl_ext_link_args, + dependencies: babl_ext_dep, + name_prefix: '', + install: true, + install_dir: babl_libdir / lib_name, + ) + endforeach + +endif diff --git a/extensions/simple.c b/extensions/simple.c index 627247a..de5999c 100644 --- a/extensions/simple.c +++ b/extensions/simple.c @@ -7,8 +7,8 @@ int init (void); static inline void float_to_u8_x1 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst, long samples) { float *src = (float *)src_char; @@ -24,8 +24,8 @@ float_to_u8_x1 (const Babl *conversion, static inline void float_to_u8_x4 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst, long samples) { float_to_u8_x1 (conversion, src_char, dst, samples * 4); @@ -33,8 +33,8 @@ float_to_u8_x4 (const Babl *conversion, static inline void float_to_u8_x3 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst, long samples) { float_to_u8_x1 (conversion, src_char, dst, samples * 3); @@ -42,8 +42,8 @@ float_to_u8_x3 (const Babl *conversion, static inline void float_to_u8_x2 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst, long samples) { float_to_u8_x1 (conversion, src_char, dst, samples * 2); @@ -53,8 +53,8 @@ float_to_u8_x2 (const Babl *conversion, static inline void float_pre_to_u8_pre (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst, long samples) { float *src = (float *)src_char; @@ -86,8 +86,8 @@ float_pre_to_u8_pre (const Babl *conversion, static inline void float_to_u16_x1 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { float *src = (float *)src_char; @@ -103,24 +103,24 @@ float_to_u16_x1 (const Babl *conversion, } static inline void float_to_u16_x2 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { float_to_u16_x1 (conversion, src_char, dst_char, samples * 2); } static inline void float_to_u16_x3 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { float_to_u16_x1 (conversion, src_char, dst_char, samples * 3); } static inline void float_to_u16_x4 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { float_to_u16_x1 (conversion, src_char, dst_char, samples * 4); @@ -128,8 +128,8 @@ float_to_u16_x4 (const Babl *conversion, static inline void float_pre_to_u16_pre (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { float *src = (float *)src_char; @@ -154,8 +154,8 @@ float_pre_to_u16_pre (const Babl *conversion, static inline void float_pre_to_u32_pre (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { float *src = (float *)src_char; @@ -181,8 +181,8 @@ float_pre_to_u32_pre (const Babl *conversion, static inline void float_to_u32_x1 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { float *src = (float *)src_char; @@ -200,24 +200,24 @@ float_to_u32_x1 (const Babl *conversion, } static void float_to_u32_x2 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { float_to_u32_x1 (conversion, src_char, dst_char, samples * 2); } static void float_to_u32_x3 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { float_to_u32_x1 (conversion, src_char, dst_char, samples * 3); } static void float_to_u32_x4 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { float_to_u32_x1 (conversion, src_char, dst_char, samples * 4); @@ -226,8 +226,8 @@ float_to_u32_x4 (const Babl *conversion, static inline void u32_to_float (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { uint32_t *src = (uint32_t *)src_char; @@ -243,8 +243,8 @@ u32_to_float (const Babl *conversion, static void u32_to_float_x4 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { u32_to_float (conversion, src_char, dst_char, samples * 4); @@ -252,8 +252,8 @@ u32_to_float_x4 (const Babl *conversion, static void u32_to_float_x3 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { u32_to_float (conversion, src_char, dst_char, samples * 3); @@ -262,8 +262,8 @@ u32_to_float_x3 (const Babl *conversion, static void u32_to_float_x2 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { u32_to_float (conversion, src_char, dst_char, samples * 2); @@ -272,8 +272,8 @@ u32_to_float_x2 (const Babl *conversion, static inline void u16_to_float (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { uint16_t *src = (uint16_t *)src_char; @@ -289,8 +289,8 @@ u16_to_float (const Babl *conversion, static void u16_to_float_x4 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { u16_to_float (conversion, src_char, dst_char, samples * 4); @@ -298,8 +298,8 @@ u16_to_float_x4 (const Babl *conversion, static void u16_to_float_x3 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { u16_to_float (conversion, src_char, dst_char, samples * 3); @@ -308,8 +308,8 @@ u16_to_float_x3 (const Babl *conversion, static void u16_to_float_x2 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { u16_to_float (conversion, src_char, dst_char, samples * 2); @@ -317,8 +317,8 @@ u16_to_float_x2 (const Babl *conversion, static inline void yau16_rgbaf (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { uint16_t *src = (uint16_t *)src_char; @@ -338,8 +338,8 @@ yau16_rgbaf (const Babl *conversion, static inline void u8_to_float (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { uint8_t *src = (uint8_t *)src_char; @@ -355,8 +355,8 @@ u8_to_float (const Babl *conversion, static void u8_to_float_x4 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { u8_to_float (conversion, src_char, dst_char, samples * 4); @@ -364,8 +364,8 @@ u8_to_float_x4 (const Babl *conversion, static void u8_to_float_x3 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { u8_to_float (conversion, src_char, dst_char, samples * 3); @@ -374,8 +374,8 @@ u8_to_float_x3 (const Babl *conversion, static void u8_to_float_x2 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { u8_to_float (conversion, src_char, dst_char, samples * 2); @@ -383,8 +383,8 @@ u8_to_float_x2 (const Babl *conversion, static inline void yau8_rgbaf (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { uint8_t *src = (uint8_t *)src_char; @@ -404,8 +404,8 @@ yau8_rgbaf (const Babl *conversion, static inline void yu8_yau8 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { uint8_t *src = (uint8_t *)src_char; @@ -423,8 +423,8 @@ yu8_yau8 (const Babl *conversion, static inline void yau8_yu8 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { uint8_t *src = (uint8_t *)src_char; @@ -442,8 +442,8 @@ yau8_yu8 (const Babl *conversion, static inline void yu16_yau16 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { uint16_t *src = (uint16_t *)src_char; @@ -460,8 +460,8 @@ yu16_yau16 (const Babl *conversion, static inline void yau16_yu16 (const Babl *conversion, - unsigned char *src_char, - unsigned char *dst_char, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, long samples) { uint16_t *src = (uint16_t *)src_char; @@ -476,9 +476,11 @@ yau16_yu16 (const Babl *conversion, } +#include "babl-verify-cpu.inc" int init (void) { + BABL_VERIFY_CPU(); /* float and u8 */ babl_conversion_new (babl_format ("R'G'B'A float"), babl_format ("R'G'B'A u8"), diff --git a/extensions/sse-half.c b/extensions/sse-half.c index cee3975..653d68f 100644 --- a/extensions/sse-half.c +++ b/extensions/sse-half.c @@ -313,7 +313,6 @@ init (void) } #endif /* defined(USE_SSE4_1) && defined(USE_F16C) && defined(ARCH_X86_64) */ - return 0; } diff --git a/extensions/sse2-int8.c b/extensions/sse2-int8.c index 6da1b5b..e337fd1 100644 --- a/extensions/sse2-int8.c +++ b/extensions/sse2-int8.c @@ -328,7 +328,6 @@ init (void) } #endif - return 0; } diff --git a/extensions/sse4-int8.c b/extensions/sse4-int8.c index d505fe5..6c61bde 100644 --- a/extensions/sse4-int8.c +++ b/extensions/sse4-int8.c @@ -222,7 +222,6 @@ init (void) } #endif - return 0; } diff --git a/extensions/two-table.c b/extensions/two-table.c index 8becfee..05c4f64 100644 --- a/extensions/two-table.c +++ b/extensions/two-table.c @@ -249,6 +249,5 @@ init (void) "linear", conv_yafloat_linear_yau8_gamma, NULL); - return 0; } diff --git a/extensions/u16.c b/extensions/u16.c index 87d2907..0fe479f 100644 --- a/extensions/u16.c +++ b/extensions/u16.c @@ -27,8 +27,8 @@ static void conv_rgbu16_rgbau16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) @@ -48,8 +48,8 @@ conv_rgbu16_rgbau16 (const Babl *conversion, static void conv_yu16_yau16 (const Babl *conversion, - unsigned char *src, - unsigned char *dst, + unsigned char *__restrict__ src, + unsigned char *__restrict__ dst, long samples) @@ -67,8 +67,11 @@ conv_yu16_yau16 (const Babl *conversion, int init (void); +#include "babl-verify-cpu.inc" int init (void) +{ + BABL_VERIFY_CPU(); { babl_conversion_new ( babl_format ("R'G'B' u16"), @@ -111,5 +114,6 @@ init (void) "linear", conv_yu16_yau16, NULL); +} return 0; } diff --git a/extensions/u32.c b/extensions/u32.c index f9d563d..02964ba 100644 --- a/extensions/u32.c +++ b/extensions/u32.c @@ -190,9 +190,11 @@ conv_yu32_yau32 (const Babl *conversion, int init (void); +#include "babl-verify-cpu.inc" int init (void) { + BABL_VERIFY_CPU(); babl_conversion_new ( babl_format ("R'G'B'A u32"), babl_format ("R'G'B'A u16"), diff --git a/extensions/ycbcr.c b/extensions/ycbcr.c index fabc44d..1e779d7 100644 --- a/extensions/ycbcr.c +++ b/extensions/ycbcr.c @@ -32,9 +32,11 @@ static void formats (void); int init (void); +#include "babl-verify-cpu.inc" int init (void) { + BABL_VERIFY_CPU(); components (); models (); conversions (); diff --git a/meson.build b/meson.build index bfa5dcf..10997d5 100644 --- a/meson.build +++ b/meson.build @@ -95,7 +95,10 @@ elif host_cpu_family == 'ppc64' conf.set10('ARCH_PPC64', true) elif host_cpu_family == 'arm' have_arm = true - config.set10('ARCH_ARM', true) + conf.set10('ARCH_ARM', true) +elif host_cpu_family == 'aarch64' + have_aarch64 = true + conf.set10('ARCH_AARCH64', true) endif @@ -189,10 +192,15 @@ endif if host_cpu_family == 'x86_64' x86_64_v2_flags = cc.get_supported_arguments(['-march=x86-64','-msse2', '-msse2','-msse4.1','-msse4.2','-mpopcnt','-mssse3']) x86_64_v3_flags = x86_64_v2_flags + cc.get_supported_arguments(['-mavx','-mavx2','-mf16c','-mfma','-mmovbe', '-mbmi', '-mbmi2']) + + x86_64_v2_flags += '-DX86_64_V2' + x86_64_v3_flags += '-DX86_64_V3' + elif host_cpu_family == 'arm' - arm_neon_flags = cc.get_supported_arguments(['-mfpu=neon']) + arm_neon_flags = cc.get_supported_arguments(['-mfpu=neon-vfpv4']) + arm_neon_flags += '-DARM_NEON' elif host_cpu_family == 'aarch64' - common_c_flags += cc.get_supported_arguments(['-mfpu=neon']) + common_c_flags += cc.get_supported_arguments(['-mfpu=neon-fp-armv8']) endif ################################################################################