summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2022-03-10 19:26:25 +0000
committerMatthias Clasen <mclasen@redhat.com>2022-03-10 19:26:25 +0000
commit9f7c9ce6ad9d0ad8743fe9c5d5a66b7ae477a5c8 (patch)
tree3f55b203fb125ab8ee87c2c6d664dd8451c9cf04
parent1f155bf39af56cb5a7d6ede72983502a9286cc9c (diff)
parent588076d16676b7231d6c61ab89a8fbc7be9a782d (diff)
downloadgtk+-9f7c9ce6ad9d0ad8743fe9c5d5a66b7ae477a5c8.tar.gz
Merge branch 'better-format-conversions' into 'main'
gdk: Clean up the optimised premultiply conversion function See merge request GNOME/gtk!4550
-rw-r--r--gdk/gdkmemoryformat.c155
1 files changed, 84 insertions, 71 deletions
diff --git a/gdk/gdkmemoryformat.c b/gdk/gdkmemoryformat.c
index 64c027eac5..5bd7b228d4 100644
--- a/gdk/gdkmemoryformat.c
+++ b/gdk/gdkmemoryformat.c
@@ -25,10 +25,6 @@
#include <epoxy/gl.h>
-#ifdef __ARM_NEON
-#include <arm_neon.h>
-#endif
-
typedef struct _GdkMemoryFormatDescription GdkMemoryFormatDescription;
#define TYPED_FUNCS(name, T, R, G, B, A, bpp, scale) \
@@ -170,75 +166,54 @@ r32g32b32a32_float_from_float (guchar *dest,
memcpy (dest, src, sizeof (float) * n * 4);
}
-// This one conversion is quite important, it converts from RGBA with straight
-// alpha (as found in PNG for instance) to BGRA with premultiplied alpha (the
-// sole cairo format available).
-static void
-r8g8b8a8_to_b8g8r8a8_premultiplied (guchar *dest,
- const guchar *src,
- gsize n)
-{
-#ifdef __ARM_NEON
- uint16x8_t one = vdupq_n_u16 (1);
- uint16x8_t half = vdupq_n_u16 (127);
-
- for (gsize i = n / 8; i > 0; i--)
- {
- // Work on “just” 8 pixels at once, since we need the full 16-bytes of
- // the q registers for the multiplication.
- uint8x8x4_t rgba = vld4_u8 (src);
- uint8x8_t r8 = rgba.val[0];
- uint8x8_t g8 = rgba.val[1];
- uint8x8_t b8 = rgba.val[2];
- uint8x8_t a8 = rgba.val[3];
-
- // This is the same algorithm as premultiply(), but on packed 16-bit
- // instead of float.
- uint16x8_t r16 = vmull_u8 (r8, a8);
- uint16x8_t g16 = vmull_u8 (g8, a8);
- uint16x8_t b16 = vmull_u8 (b8, a8);
-
- r16 = vaddq_u16 (r16, half);
- g16 = vaddq_u16 (g16, half);
- b16 = vaddq_u16 (b16, half);
-
- r16 = vsraq_n_u16 (r16, r16, 8);
- g16 = vsraq_n_u16 (g16, g16, 8);
- b16 = vsraq_n_u16 (b16, b16, 8);
-
- r16 = vaddq_u16 (r16, one);
- g16 = vaddq_u16 (g16, one);
- b16 = vaddq_u16 (b16, one);
-
- // Just like the other one, here we use BGRA instead of RGBA!
- rgba.val[0] = vshrn_n_u16 (b16, 8);
- rgba.val[1] = vshrn_n_u16 (g16, 8);
- rgba.val[2] = vshrn_n_u16 (r16, 8);
-
- vst4_u8 (dest, rgba);
- src += 32;
- dest += 32;
- }
+#define PREMULTIPLY_FUNC(name, R1, G1, B1, A1, R2, G2, B2, A2) \
+static void \
+name (guchar *dest, \
+ const guchar *src, \
+ gsize n) \
+{ \
+ for (; n > 0; n--) \
+ { \
+ guchar a = src[A1]; \
+ guint16 r = (guint16)src[R1] * a + 127; \
+ guint16 g = (guint16)src[G1] * a + 127; \
+ guint16 b = (guint16)src[B1] * a + 127; \
+ dest[R2] = (r + (r >> 8) + 1) >> 8; \
+ dest[G2] = (g + (g >> 8) + 1) >> 8; \
+ dest[B2] = (b + (b >> 8) + 1) >> 8; \
+ dest[A2] = a; \
+ dest += 4; \
+ src += 4; \
+ } \
+}
- // We want the fallthrough here for the last (up to) seven bytes of the row.
- n = n % 8;
-#endif // __ARM_NEON
+PREMULTIPLY_FUNC(r8g8b8a8_to_r8g8b8a8_premultiplied, 0, 1, 2, 3, 0, 1, 2, 3)
+PREMULTIPLY_FUNC(r8g8b8a8_to_b8g8r8a8_premultiplied, 0, 1, 2, 3, 2, 1, 0, 3)
+PREMULTIPLY_FUNC(r8g8b8a8_to_a8r8g8b8_premultiplied, 0, 1, 2, 3, 1, 2, 3, 0)
+PREMULTIPLY_FUNC(r8g8b8a8_to_a8b8g8r8_premultiplied, 0, 1, 2, 3, 3, 2, 1, 0)
- for (; n > 0; n--)
- {
- guchar a = src[3];
- guint16 r = (guint16)src[0] * a + 127;
- guint16 g = (guint16)src[1] * a + 127;
- guint16 b = (guint16)src[2] * a + 127;
- dest[0] = (b + (b >> 8) + 1) >> 8;
- dest[1] = (g + (g >> 8) + 1) >> 8;
- dest[2] = (r + (r >> 8) + 1) >> 8;
- dest[3] = a;
- dest += 4;
- src += 4;
- }
+#define ADD_ALPHA_FUNC(name, R1, G1, B1, R2, G2, B2, A2) \
+static void \
+name (guchar *dest, \
+ const guchar *src, \
+ gsize n) \
+{ \
+ for (; n > 0; n--) \
+ { \
+ dest[R2] = src[R1]; \
+ dest[G2] = src[G1]; \
+ dest[B2] = src[B1]; \
+ dest[A2] = 255; \
+ dest += 4; \
+ src += 3; \
+ } \
}
+ADD_ALPHA_FUNC(r8g8b8_to_r8g8b8a8, 0, 1, 2, 0, 1, 2, 3)
+ADD_ALPHA_FUNC(r8g8b8_to_b8g8r8a8, 0, 1, 2, 2, 1, 0, 3)
+ADD_ALPHA_FUNC(r8g8b8_to_a8r8g8b8, 0, 1, 2, 1, 2, 3, 0)
+ADD_ALPHA_FUNC(r8g8b8_to_a8b8g8r8, 0, 1, 2, 3, 2, 1, 0)
+
struct _GdkMemoryFormatDescription
{
GdkMemoryAlpha alpha;
@@ -548,15 +523,53 @@ gdk_memory_convert (guchar *dest_data,
const GdkMemoryFormatDescription *src_desc = &memory_formats[src_format];
float *tmp;
gsize y;
+ void (*func) (guchar *, const guchar *, gsize) = NULL;
g_assert (dest_format < GDK_MEMORY_N_FORMATS);
g_assert (src_format < GDK_MEMORY_N_FORMATS);
- if (src_format == GDK_MEMORY_R8G8B8A8 && dest_format == GDK_MEMORY_B8G8R8A8_PREMULTIPLIED)
+ if (src_format == GDK_MEMORY_R8G8B8A8 && dest_format == GDK_MEMORY_R8G8B8A8_PREMULTIPLIED)
+ func = r8g8b8a8_to_r8g8b8a8_premultiplied;
+ else if (src_format == GDK_MEMORY_B8G8R8A8 && dest_format == GDK_MEMORY_R8G8B8A8_PREMULTIPLIED)
+ func = r8g8b8a8_to_b8g8r8a8_premultiplied;
+ else if (src_format == GDK_MEMORY_R8G8B8A8 && dest_format == GDK_MEMORY_B8G8R8A8_PREMULTIPLIED)
+ func = r8g8b8a8_to_b8g8r8a8_premultiplied;
+ else if (src_format == GDK_MEMORY_B8G8R8A8 && dest_format == GDK_MEMORY_B8G8R8A8_PREMULTIPLIED)
+ func = r8g8b8a8_to_r8g8b8a8_premultiplied;
+ else if (src_format == GDK_MEMORY_R8G8B8A8 && dest_format == GDK_MEMORY_A8R8G8B8_PREMULTIPLIED)
+ func = r8g8b8a8_to_a8r8g8b8_premultiplied;
+ else if (src_format == GDK_MEMORY_B8G8R8A8 && dest_format == GDK_MEMORY_A8R8G8B8_PREMULTIPLIED)
+ func = r8g8b8a8_to_a8b8g8r8_premultiplied;
+ else if (src_format == GDK_MEMORY_R8G8B8 && dest_format == GDK_MEMORY_R8G8B8A8_PREMULTIPLIED)
+ func = r8g8b8_to_r8g8b8a8;
+ else if (src_format == GDK_MEMORY_B8G8R8 && dest_format == GDK_MEMORY_R8G8B8A8_PREMULTIPLIED)
+ func = r8g8b8_to_b8g8r8a8;
+ else if (src_format == GDK_MEMORY_R8G8B8 && dest_format == GDK_MEMORY_B8G8R8A8_PREMULTIPLIED)
+ func = r8g8b8_to_b8g8r8a8;
+ else if (src_format == GDK_MEMORY_B8G8R8 && dest_format == GDK_MEMORY_B8G8R8A8_PREMULTIPLIED)
+ func = r8g8b8_to_r8g8b8a8;
+ else if (src_format == GDK_MEMORY_R8G8B8 && dest_format == GDK_MEMORY_A8R8G8B8_PREMULTIPLIED)
+ func = r8g8b8_to_a8r8g8b8;
+ else if (src_format == GDK_MEMORY_B8G8R8 && dest_format == GDK_MEMORY_A8R8G8B8_PREMULTIPLIED)
+ func = r8g8b8_to_a8b8g8r8;
+ else if (src_format == GDK_MEMORY_R8G8B8 && dest_format == GDK_MEMORY_R8G8B8A8)
+ func = r8g8b8_to_r8g8b8a8;
+ else if (src_format == GDK_MEMORY_B8G8R8 && dest_format == GDK_MEMORY_R8G8B8A8)
+ func = r8g8b8_to_b8g8r8a8;
+ else if (src_format == GDK_MEMORY_R8G8B8 && dest_format == GDK_MEMORY_B8G8R8A8)
+ func = r8g8b8_to_b8g8r8a8;
+ else if (src_format == GDK_MEMORY_B8G8R8 && dest_format == GDK_MEMORY_B8G8R8A8)
+ func = r8g8b8_to_r8g8b8a8;
+ else if (src_format == GDK_MEMORY_R8G8B8 && dest_format == GDK_MEMORY_A8R8G8B8)
+ func = r8g8b8_to_a8r8g8b8;
+ else if (src_format == GDK_MEMORY_B8G8R8 && dest_format == GDK_MEMORY_A8R8G8B8)
+ func = r8g8b8_to_a8b8g8r8;
+
+ if (func != NULL)
{
for (y = 0; y < height; y++)
{
- r8g8b8a8_to_b8g8r8a8_premultiplied (dest_data, src_data, width);
+ func (dest_data, src_data, width);
src_data += src_stride;
dest_data += dest_stride;
}