From: Ell Date: Sun, 21 Oct 2018 09:18:36 +0000 (-0400) Subject: cairo: accelerate R'G'B'A u8 -> cairo-ARGB32 conversion X-Git-Tag: archive/raspbian/1%0.1.106-3+rpi1^2~15^2~13^2~7 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=6d890333f6337fe044cce3eed3e474e43cd7afc9;p=babl.git cairo: accelerate R'G'B'A u8 -> cairo-ARGB32 conversion Accelerate the R'G'B'A u8 -> cairo-ARGB32 conversion by processing the entire RGBA tuple in parallel (or, on 32-bit machines, in two parts), instead of component-by-component, using wide integer arithmetic/logic. This speeds the conversion up by ~25%. --- diff --git a/extensions/cairo.c b/extensions/cairo.c index 059b07d..9a96fc3 100644 --- a/extensions/cairo.c +++ b/extensions/cairo.c @@ -187,12 +187,34 @@ conv_rgba8_cairo32_le (const Babl *conversion,unsigned char *src, unsigned char uint32_t *dsti = (void*) dst; while (n--) { - unsigned char alpha = src[3]; -#define div_255(a) ((((a)+128)+(((a)+128)>>8))>>8) - *dsti++ = (alpha << 24) + - (div_255 (src[0] * alpha) << 16) + - (div_255 (src[1] * alpha) << 8) + - (div_255 (src[2] * alpha)); + unsigned char alpha = src[3]; +#if SIZE_MAX >= UINT64_MAX /* 64-bit */ + uint64_t rbag = ((uint64_t) src[0] << 48) | + ((uint64_t) src[2] << 32) | + ((uint64_t) 255 << 16) | + ((uint64_t) src[1] << 0); + rbag *= alpha; + rbag += 0x0080008000800080; + rbag += (rbag >> 8) & 0x00ff00ff00ff00ff; + rbag &= 0xff00ff00ff00ff00; + *dsti++ = (uint32_t) (rbag >> 0) | + (uint32_t) (rbag >> 40); +#else /* 32-bit */ + uint32_t rb = ((uint32_t) src[0] << 16) | + ((uint32_t) src[2] << 0); + uint64_t ag = ((uint32_t) 255 << 16) | + ((uint32_t) src[1] << 0); + rb *= alpha; + ag *= alpha; + rb += 0x00800080; + ag += 0x00800080; + rb += (rb >> 8) & 0x00ff00ff; + ag += (ag >> 8) & 0x00ff00ff; + rb &= 0xff00ff00; + ag &= 0xff00ff00; + *dsti++ = (uint32_t) (ag >> 0) | + (uint32_t) (rb >> 8); +#endif src+=4; } } @@ -223,6 +245,8 @@ conv_yA8_cairo32_le (const Babl *conversion,unsigned char *src, unsigned char *d long n = samples; while (n--) { +#define div_255(a) ((((a)+128)+(((a)+128)>>8))>>8) + unsigned char gray = *src++; unsigned char alpha = *src++; unsigned char val = div_255 (gray * alpha);