diff options
author | Owen Taylor <otaylor@redhat.com> | 2002-01-04 00:34:06 +0000 |
---|---|---|
committer | Owen Taylor <otaylor@src.gnome.org> | 2002-01-04 00:34:06 +0000 |
commit | c96a394cc2e58f710237ceb497e9b8bc55e5327c (patch) | |
tree | c32dbc7e2ab89b0603015bc7a3b73fa1cf9ace46 /gdk-pixbuf | |
parent | 317ba5b26725dcd23351c70910ac0010ead1a415 (diff) | |
download | gdk-pixbuf-c96a394cc2e58f710237ceb497e9b8bc55e5327c.tar.gz |
Fix and uncomment non-MMX version.
Thu Jan 3 19:31:58 2002 Owen Taylor <otaylor@redhat.com>
* pixops/pixops.c (scale_line_22_33): Fix and uncomment
non-MMX version.
* pixops/pixops.c (pixops_composite_nearest): Remove a
division.
* pixops/pixops.c (pixops_composite): Add some docs
about the parameters.
* pixops/README: Add notes about the correct algorithms
for alpha compositing and how to implement them quickly.
Diffstat (limited to 'gdk-pixbuf')
-rw-r--r-- | gdk-pixbuf/ChangeLog | 14 | ||||
-rw-r--r-- | gdk-pixbuf/pixops/README | 61 | ||||
-rw-r--r-- | gdk-pixbuf/pixops/pixops.c | 66 | ||||
-rw-r--r-- | gdk-pixbuf/pixops/scale_line_22_33_mmx.S | 26 |
4 files changed, 143 insertions, 24 deletions
diff --git a/gdk-pixbuf/ChangeLog b/gdk-pixbuf/ChangeLog index 9a8e370a3..390efd9fd 100644 --- a/gdk-pixbuf/ChangeLog +++ b/gdk-pixbuf/ChangeLog @@ -1,3 +1,17 @@ +Thu Jan 3 19:31:58 2002 Owen Taylor <otaylor@redhat.com> + + * pixops/pixops.c (scale_line_22_33): Fix and uncomment + non-MMX version. + + * pixops/pixops.c (pixops_composite_nearest): Remove a + division. + + * pixops/pixops.c (pixops_composite): Add some docs + about the parameters. + + * pixops/README: Add notes about the correct algorithms + for alpha compositing and how to implement them quickly. + 2001-12-29 Tor Lillqvist <tml@iki.fi> * Makefile.am (EXTRA_DIST): Add makefile.msc. diff --git a/gdk-pixbuf/pixops/README b/gdk-pixbuf/pixops/README index 957a0b3cb..c79e0e2fd 100644 --- a/gdk-pixbuf/pixops/README +++ b/gdk-pixbuf/pixops/README @@ -33,6 +33,61 @@ for a number of the most common special cases: compositing from RGBA to RGBx compositing against a color from RGBA and storing in a RGBx buffer +Alpha compositing 8 bit RGBAa onto RGB is defined in terms of +rounding the exact result (real values in [0,1]): + + cc = ca * aa + (1 - aa) * Cb + + Cc = ROUND [255. * (Ca/255. * Aa/255. + (1 - Aa/255.) * Cb/255.)] + +We can comp + +ROUND(i / 255.) can be computed exactly for i in [0,255*255] as: + + t = i + 0x80; result = (t + (t >> 8)) >> 8; [ call this as To8(i) ] + +So, + + t = Ca * Aa + (255 - Aa) * Cb + 0x80; + Cc = (t + (t >> 8)) >> 8; + +Alpha compositing 8 bit RaGaBaAa onto RbGbBbAa is a little harder, for +non-premultiplied alpha. The premultiplied result is simple: + + ac = aa + (1 - aa) * ab + cc = ca + (1 - aa) * cb + +Which can be computed in integers terms as: + + Cc = Ca + To8 ((255 - Aa) * Cb) + Ac = Aa + To8 ((255 - Aa) * Ab) + +For non-premultiplied alpha, we need divide the color components by +the alpha: + + +- (ca * aa + (1 - aa) * ab * cb)) / ac; aa != 0 + cc = | + +- cb; aa == 0 + +To calculate this as in integer, we note the alternate form: + + cc = cb + aa * (ca - cb) / ac + +[ 'cc = ca + (ac - aa) * (cb - ca) / ac' can also be useful numerically, + but isn't important here ] + +We can express this as integers as: + + Ac_tmp = Aa * 255 + (255 - Aa) * Ab; + + +- Cb + (255 * Aa * (Ca - Cb) + Ac_tmp / 2) / Ac_tmp ; Ca > Cb + Cc = | + +- Cb - (255 * Aa * (Cb - Ca) + Ac_tmp / 2) / Ac_tmp ; ca <= Cb + +Or, playing bit tricks to avoid the conditional + + Cc = Cb + (255 * Aa * (Ca - Cb) + (((Ca - Cb) >> 8) ^ (Ac_tmp / 2)) ) / Ac_tmp + TODO ==== @@ -57,13 +112,13 @@ TODO the _nearest() variants do it right, most of the other code is wrong to some degree or another. - For instance, in composite line, we have: + For instance, in composite_line_22_4a4(), we have: dest[0] = ((0xff0000 - a) * dest[0] + r) >> 24; - if a is 0, then we have: + if a is 0 (implies r == 0), then we have: - (0xff0000 * dest[0] + r) >> 24 + (0xff0000 * dest[0]) >> 24 which gives results which are 1 to low: diff --git a/gdk-pixbuf/pixops/pixops.c b/gdk-pixbuf/pixops/pixops.c index c9606107f..ef5afd747 100644 --- a/gdk-pixbuf/pixops/pixops.c +++ b/gdk-pixbuf/pixops/pixops.c @@ -202,10 +202,14 @@ pixops_composite_nearest (guchar *dest_buf, else { unsigned int a1 = 0xff - a0; - - dest[0] = (a0 * p[0] + a1 * dest[0]) / 0xff; - dest[1] = (a0 * p[1] + a1 * dest[1]) / 0xff; - dest[2] = (a0 * p[2] + a1 * dest[2]) / 0xff; + unsigned int tmp; + + tmp = a0 * p[0] + a1 * dest[0] + 0x80; + dest[0] = (tmp + (tmp >> 8)) >> 8; + tmp = a0 * p[1] + a1 * dest[1] + 0x80; + dest[1] = (tmp + (tmp >> 8)) >> 8; + tmp = a0 * p[2] + a1 * dest[2] + 0x80; + dest[2] = (tmp + (tmp >> 8)) >> 8; } break; } @@ -388,7 +392,7 @@ composite_line (int *weights, int n_x, int n_y, int *pixel_weights; pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y; - + for (i=0; i<n_y; i++) { guchar *q = src[i] + x_scaled * src_channels; @@ -837,10 +841,9 @@ scale_line_22_33_mmx_stub (int *weights, int n_x, int n_y, } #endif /* USE_MMX */ -#ifdef SCALE_LINE_22_33_USED /* This dead code would need changes if we wanted to use it */ static guchar * scale_line_22_33 (int *weights, int n_x, int n_y, - guchar *dest, guchar *dest_end, int dest_channels, int dest_has_alpha, + guchar *dest, int dest_x, guchar *dest_end, int dest_channels, int dest_has_alpha, guchar **src, int src_channels, gboolean src_has_alpha, int x_init, int x_step, int src_width, int check_size, guint32 color1, guint32 color2) @@ -860,8 +863,8 @@ scale_line_22_33 (int *weights, int n_x, int n_y, q0 = src0 + x_scaled * 3; q1 = src1 + x_scaled * 3; - pixel_weights = (int *)((char *)weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS - 4)) & (SUBSAMPLE_MASK << 4))); - + pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * 4; + w1 = pixel_weights[0]; w2 = pixel_weights[1]; w3 = pixel_weights[2]; @@ -883,9 +886,9 @@ scale_line_22_33 (int *weights, int n_x, int n_y, g += w4 * q1[5]; b += w4 * q1[6]; - dest[0] = r >> 16; - dest[1] = g >> 16; - dest[2] = b >> 16; + dest[0] = (r + 0x8000) >> 16; + dest[1] = (g + 0x8000) >> 16; + dest[2] = (b + 0x8000) >> 16; dest += 3; @@ -894,7 +897,6 @@ scale_line_22_33 (int *weights, int n_x, int n_y, return dest; } -#endif /* SCALE_LINE_22_33_USED */ static void process_pixel (int *weights, int n_x, int n_y, @@ -1227,7 +1229,7 @@ bilinear_make_fast_weights (PixopsFilter *filter, double x_scale, double y_scale for (i = 0; i < n_y; i++) for (j = 0; j < n_x; j++) - *(pixel_weights + n_x * i + j) = 65536 * x_weights[j] * x_scale * y_weights[i] * y_scale * overall_alpha; + *(pixel_weights + n_x * i + j) = 65536 * x_weights[j] * x_scale * y_weights[i] * y_scale * overall_alpha + 0.5; } g_free (x_weights); @@ -1412,6 +1414,30 @@ pixops_composite_color (guchar *dest_buf, g_free (filter.weights); } +/** + * pixops_composite: + * @dest_buf: pointer to location to store result + * @render_x0: x0 of region of scaled source to store into @dest_buf + * @render_y0: y0 of region of scaled source to store into @dest_buf + * @render_x1: x1 of region of scaled source to store into @dest_buf + * @render_y1: x1 of region of scaled source to store into @dest_buf + * @dest_rowstride: rowstride of @dest_buf + * @dest_channels: number of channels in @dest_buf + * @dest_has_alpha: whether @dest_buf has alpha + * @src_buf: pointer to source pixels + * @src_width: width of source (used for clipping) + * @src_height: height of source (used for clipping) + * @src_rowstride: rowstride of source + * @src_channels: number of channels in @src_buf + * @src_has_alpha: whether @src_buf has alpha + * @scale_x: amount to scale source by in X direction + * @scale_y: amount to scale source by in Y direction + * @interp_type: type of enumeration + * @overall_alpha: overall alpha factor to multiply source by + * + * Scale source buffer by scale_x / scale_y, then composite a given rectangle + * of the result into the destination buffer. + **/ void pixops_composite (guchar *dest_buf, int render_x0, @@ -1550,12 +1576,16 @@ pixops_scale (guchar *dest_buf, break; } + if (filter.n_x == 2 && filter.n_y == 2 && dest_channels == 3 && src_channels == 3) + { #ifdef USE_MMX - if (filter.n_x == 2 && filter.n_y == 2 && - found_mmx && dest_channels == 3 && src_channels == 3) - line_func = scale_line_22_33_mmx_stub; + if (found_mmx) + line_func = scale_line_22_33_mmx_stub; + else +#endif + line_func = scale_line_22_33; + } else -#endif line_func = scale_line; pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1, diff --git a/gdk-pixbuf/pixops/scale_line_22_33_mmx.S b/gdk-pixbuf/pixops/scale_line_22_33_mmx.S index 40dec48d3..8259a9db3 100644 --- a/gdk-pixbuf/pixops/scale_line_22_33_mmx.S +++ b/gdk-pixbuf/pixops/scale_line_22_33_mmx.S @@ -55,6 +55,12 @@ _pixops_scale_line_22_33_mmx: cmpl %esi,28(%ebp) je .out +/* For the body of this loop, %mm01, %mm1, %mm2, %mm3 hold the 4 adjoining + * points we are interpolating between, as: + * + * 000000BB00GG00RR + */ + /* Load initial values into %mm1, %mm3 */ leal (%edx,%edx,2),%edx # Multiply by 3 @@ -82,11 +88,16 @@ _pixops_scale_line_22_33_mmx: jmp .newx .p2align 4,,7 .loop: -/* int x_index = (x & 0xf000) >> 12 */ +/* short *pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y + * 16 4 0xf 2 2 + */ movl %ebx,%eax andl $0xf000,%eax shrl $7,%eax +/* At this point, %edi holds weights. Load the 4 weights into %mm4,%mm5,%mm6,%mm7, multiply and + * accumulate. + */ movq (%edi,%eax),%mm4 pmullw %mm0,%mm4 movq 8(%edi,%eax),%mm5 @@ -99,7 +110,17 @@ _pixops_scale_line_22_33_mmx: paddw %mm6, %mm7 paddw %mm5, %mm7 +/* %mm7 holds the accumulated sum. Compute (C + 0x80) / 256 + */ + pxor %mm4, %mm4 + movl $8421504, %eax # 0x00808080 + movd %eax, %mm6 + punpcklbw %mm4, %mm6 + paddw %mm6, %mm7 psrlw $8, %mm7 + +/* Pack into %eax and store result + */ packuswb %mm7, %mm7 movd %mm7, %eax @@ -113,7 +134,7 @@ _pixops_scale_line_22_33_mmx: /* x += x_step; */ addl 24(%ebp),%ebx -/* x_scale = x >> 16; */ +/* x_scaled = x >> 16; */ movl %ebx,%edx sarl $16,%edx @@ -131,7 +152,6 @@ _pixops_scale_line_22_33_mmx: leal (%edx,%edx,2),%edx # Multiply by 3 movl 16(%ebp),%edi - pxor %mm4, %mm4 movzbl 2(%edi,%edx),%ecx shll $16,%ecx movzwl (%edi,%edx),%eax |