diff options
author | Michael Jennings <mej@kainx.org> | 2006-01-04 09:15:17 +0000 |
---|---|---|
committer | Michael Jennings <mej@kainx.org> | 2006-01-04 09:15:17 +0000 |
commit | 9c737f4aa10c9b04cf06de5811d4814e347eb249 (patch) | |
tree | f47a09a6d4c3602ee892d5929c37e41cb6418de3 | |
parent | e39e3f1ce9ecde39c96aa67113130d23c002dca3 (diff) | |
download | eterm-9c737f4aa10c9b04cf06de5811d4814e347eb249.tar.gz |
Wed Jan 4 04:14:14 2006 Michael Jennings (mej)
Applied patches from Tres Melton <tres@mindspring.com> for SSE2
alignment and other shading-related issues.
----------------------------------------------------------------------
SVN revision: 19521
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | src/pixmap.c | 8 | ||||
-rw-r--r-- | src/sse2_cmod.c | 64 |
3 files changed, 35 insertions, 42 deletions
@@ -5449,3 +5449,8 @@ Wed Jan 4 03:44:04 2006 Michael Jennings (mej) X resource tracking/debugging support. ---------------------------------------------------------------------- +Wed Jan 4 04:14:14 2006 Michael Jennings (mej) + +Applied patches from Tres Melton <tres@mindspring.com> for SSE2 +alignment and other shading-related issues. +---------------------------------------------------------------------- diff --git a/src/pixmap.c b/src/pixmap.c index 9c97d25..01116d4 100644 --- a/src/pixmap.c +++ b/src/pixmap.c @@ -1748,14 +1748,7 @@ shade_ximage_24(void *data, int bpl, int w, int h, int rm, int gm, int bm) void colormod_trans(Pixmap p, imlib_t *iml, GC gc, unsigned short w, unsigned short h) { - -#ifdef HAVE_SSE2 - XImage * __attribute__ ((aligned (16))) ximg; -#elif defined HAVE_MMX - XImage * __attribute__ ((aligned (8))) ximg; -#else XImage *ximg; -#endif register unsigned long i; #if 0 @@ -1887,6 +1880,7 @@ colormod_trans(Pixmap p, imlib_t *iml, GC gc, unsigned short w, unsigned short h if (ximg->bits_per_pixel != 32) { D_PIXMAP(("Rendering 24 bit\n")); shade_ximage_24(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm); + break; } /* drop */ case 32: diff --git a/src/sse2_cmod.c b/src/sse2_cmod.c index 612f9eb..c61e3fd 100644 --- a/src/sse2_cmod.c +++ b/src/sse2_cmod.c @@ -1,12 +1,17 @@ -/* File: sse2_cmod.c - * Written and Copyright (C) 2005 by Tres Melton +/* + * Copyright (C) 1997-2006, Michael Jennings * - * Permission is hereby granted to Michael Jennings to license this code as - * he sees fit. I'd prefer the GPL but he will choose the BSD. The debate - * is moot as this is to become a part of the Eterm project, for which he is - * the primary author. For users of this code I ask that any modifications - * be released back into the community but with Michael Jennings chooses the - * BSD license then that request has no backing in law. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies of the Software, its documentation and marketing & publicity + * materials, and acknowledgment shall be given in the documentation, materials + * and software packages that this Software was used. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -15,8 +20,9 @@ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * Ported from <willem@stack.nl> Willem Monsuwe's original x86/MMX assembly + * code by Tres Melton in 2005 and 2006. Anything copyrightable by me is + * assigned to the Eterm project and its founder/maintainer: Michael Jennings. * * Much inspiration was drawn from the original x86 MMX port written by * Willem Monsuwe <willem@stack.nl> in pure x86/MMX Assembly. The MMX @@ -25,6 +31,8 @@ * ensure they worked with various gcc options. Further the code was * extended to take advantage of the 128 bit xmm registers in SSE2. * + * The imlib2 code in Enlightenment also has a lot to teach on the subject. + * * Manuals used in this port: * The Gnu Assembler * http://www.gnu.org/software/binutils/manual/gas-2.9.1/html_mono/as.html @@ -65,21 +73,7 @@ * assembly code in C functions is the only way to ensure that this code will continue to * function through a (however unlikely) change. If pure assembly were to be used as the * original MMX author, Willem Monsuwe, did and the ABI changed then this code would cease - * to function properly. After examination of Willem's code I'm wondering if he - * wrote it for GNU/Linux originally. The ENTER and LEAVE macros put all of the - * parameters on the stack so that they can be accessed by references to the Base_Pointer - * the way that ANSI C is defined. If he originally wrote this for GNU/Linux then he most - * likely would have just used the registers instead of unwinding those optimizations - * manually by pushing them to the stack. And if he explicitly wanted to use the stack - * then there are parameters to gcc that would have performed those operations for him: - * -mregparm/-mmemparm. Other gcc options that can tweak with the stack and the number - * of registers available for function parameters are: -fcall-used/-fcall-saved, - * -fcaller-saves, -fstack-protector, -fPIC/-fpic, -mno-push-args, etc.. It might be - * advisable to check for these switches when using the original MMX code and emit a - * warning if any are enabled. I know that the PIC option trashes the BX register and - * that both Willem and I use that register. In other words If you do manage to get it - * to compile & run w/ -fpic it WILL break. On the plus side, you can keep the pieces! :-) - * On the other hand I could be wrong about everything + * to function properly. * * In Conclusion: * Using C functions and inline assembly code should alleviate all of the concerns as the @@ -126,8 +120,8 @@ void shade_ximage_15_sse2( volatile void *data, volatile int bpl, volatile int w "jns 3f \n\t" "2: \n\t" /* Start of the inner loop (pixels 8 at a time --> 8 * 16 = 128bits/xmm register ) */ "movdqu (%%rsi, %%rcx, 2), %%xmm0\n\t" /* Load the 16 bits of the pixel (5 bits for red, 6 bits for green, 5 bits for blue) */ - "movdqu %%xmm0, %%xmm1 \n\t" /* Create a copy of the pixel for the green color */ - "movdqu %%xmm0, %%xmm2 \n\t" /* Create a copy of the pixel for the blue color */ + "movdqa %%xmm0, %%xmm1 \n\t" /* Create a copy of the pixel for the green color */ + "movdqa %%xmm0, %%xmm2 \n\t" /* Create a copy of the pixel for the blue color */ "psrlw $5, %%xmm1 \n\t" /* Packed Shift Right Logical Words */ /* From A64_128bit_Media_Programming (p. 347) */ /* Shifts the blue off of the green color */ @@ -191,8 +185,8 @@ void shade_ximage_15_sse2( volatile void *data, volatile int bpl, volatile int w "jns 8f \n\t" "7: \n\t" "movdqu (%%rsi, %%rcx, 2), %%xmm0\n\t" - "movdqu %%xmm0, %%xmm1 \n\t" - "movdqu %%xmm0, %%xmm2 \n\t" + "movdqa %%xmm0, %%xmm1 \n\t" + "movdqa %%xmm0, %%xmm2 \n\t" "psrlw $5, %%xmm1 \n\t" "psrlw $10, %%xmm0 \n\t" "psllw $11, %%xmm2 \n\t" @@ -301,8 +295,8 @@ void shade_ximage_16_sse2( volatile void *data, volatile int bpl, volatile int w "jns 3f \n\t" "2: \n\t" /* Start of the inner loop (pixels 8 at a time --> 8 * 16 = 128bits/xmm register ) */ "movdqu (%%rsi, %%rcx, 2), %%xmm0\n\t" /* Load the 16 bits of the pixel (5 bits for red, 6 bits for green, 5 bits for blue) */ - "movdqu %%xmm0, %%xmm1 \n\t" /* Create a copy of the pixel for the green color */ - "movdqu %%xmm0, %%xmm2 \n\t" /* Create a copy of the pixel for the blue color */ + "movdqa %%xmm0, %%xmm1 \n\t" /* Create a copy of the pixel for the green color */ + "movdqa %%xmm0, %%xmm2 \n\t" /* Create a copy of the pixel for the blue color */ "psrlw $5, %%xmm1 \n\t" /* Packed Shift Right Logical Words */ /* From A64_128bit_Media_Programming (p. 347) */ /* Shifts the blue off of the green color */ @@ -359,7 +353,7 @@ void shade_ximage_16_sse2( volatile void *data, volatile int bpl, volatile int w "pcmpeqw %%xmm3, %%xmm3 \n\t" /* Packed Compare Equal Words */ /* From A64_128bit_Media_Programming (p. 276) */ /* This sets xmm3 to 128 1's (since mm6 = mm6) */ - "movdqu %%xmm3, %%xmm4 \n\t" /* Make copy of 128 ones */ + "movdqa %%xmm3, %%xmm4 \n\t" /* Make copy of 128 ones */ "psllw $5, %%xmm3 \n\t" /* xmm3 = 8 copies of 1111 1111 1110 0000 */ "psllw $6, %%xmm4 \n\t" /* xmm4 = 8 copies of 1111 1111 1100 0000 */ "6: \n\t" @@ -368,8 +362,8 @@ void shade_ximage_16_sse2( volatile void *data, volatile int bpl, volatile int w "jns 8f \n\t" "7: \n\t" "movdqu (%%rsi, %%rcx, 2), %%xmm0\n\t" - "movdqu %%xmm0, %%xmm1 \n\t" - "movdqu %%xmm0, %%xmm2 \n\t" + "movdqa %%xmm0, %%xmm1 \n\t" + "movdqa %%xmm0, %%xmm2 \n\t" "psrlw $5, %%xmm1 \n\t" "psrlw $11, %%xmm0 \n\t" "psllw $11, %%xmm2 \n\t" @@ -480,7 +474,7 @@ void shade_ximage_32_sse2( volatile void *data, volatile int bpl, volatile int w "psllw $15, %%xmm6 \n\t" /* Packed Shift Left Logical Words */ /* From A64_128bit_Media_Programming (p. 330) */ /* This sets 8 16 bit values of 1000 0000 0000 0000 in the 128 bit word */ - "movdqu %%xmm6, %%xmm5 \n\t" /* Copy mm6 to mm5 (we need mm6 later) */ + "movdqa %%xmm6, %%xmm5 \n\t" /* Copy mm6 to mm5 (we need mm6 later) */ "pmulhw %%xmm4, %%xmm5 \n\t" /* Packed Multiply High Signed Word */ /* mm4 = ( mm4 * mm5 ) >> 16 (8 times, once for each 16bit value) */ /* For each color_ modifier (cm) */ |