summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Jennings <mej@kainx.org>2006-01-04 09:15:17 +0000
committerMichael Jennings <mej@kainx.org>2006-01-04 09:15:17 +0000
commit9c737f4aa10c9b04cf06de5811d4814e347eb249 (patch)
treef47a09a6d4c3602ee892d5929c37e41cb6418de3
parente39e3f1ce9ecde39c96aa67113130d23c002dca3 (diff)
downloadeterm-9c737f4aa10c9b04cf06de5811d4814e347eb249.tar.gz
Wed Jan 4 04:14:14 2006 Michael Jennings (mej)
Applied patches from Tres Melton <tres@mindspring.com> for SSE2 alignment and other shading-related issues. ---------------------------------------------------------------------- SVN revision: 19521
-rw-r--r--ChangeLog5
-rw-r--r--src/pixmap.c8
-rw-r--r--src/sse2_cmod.c64
3 files changed, 35 insertions, 42 deletions
diff --git a/ChangeLog b/ChangeLog
index 625d72a..9864704 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -5449,3 +5449,8 @@ Wed Jan 4 03:44:04 2006 Michael Jennings (mej)
X resource tracking/debugging support.
----------------------------------------------------------------------
+Wed Jan 4 04:14:14 2006 Michael Jennings (mej)
+
+Applied patches from Tres Melton <tres@mindspring.com> for SSE2
+alignment and other shading-related issues.
+----------------------------------------------------------------------
diff --git a/src/pixmap.c b/src/pixmap.c
index 9c97d25..01116d4 100644
--- a/src/pixmap.c
+++ b/src/pixmap.c
@@ -1748,14 +1748,7 @@ shade_ximage_24(void *data, int bpl, int w, int h, int rm, int gm, int bm)
void
colormod_trans(Pixmap p, imlib_t *iml, GC gc, unsigned short w, unsigned short h)
{
-
-#ifdef HAVE_SSE2
- XImage * __attribute__ ((aligned (16))) ximg;
-#elif defined HAVE_MMX
- XImage * __attribute__ ((aligned (8))) ximg;
-#else
XImage *ximg;
-#endif
register unsigned long i;
#if 0
@@ -1887,6 +1880,7 @@ colormod_trans(Pixmap p, imlib_t *iml, GC gc, unsigned short w, unsigned short h
if (ximg->bits_per_pixel != 32) {
D_PIXMAP(("Rendering 24 bit\n"));
shade_ximage_24(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
+ break;
}
/* drop */
case 32:
diff --git a/src/sse2_cmod.c b/src/sse2_cmod.c
index 612f9eb..c61e3fd 100644
--- a/src/sse2_cmod.c
+++ b/src/sse2_cmod.c
@@ -1,12 +1,17 @@
-/* File: sse2_cmod.c
- * Written and Copyright (C) 2005 by Tres Melton
+/*
+ * Copyright (C) 1997-2006, Michael Jennings
*
- * Permission is hereby granted to Michael Jennings to license this code as
- * he sees fit. I'd prefer the GPL but he will choose the BSD. The debate
- * is moot as this is to become a part of the Eterm project, for which he is
- * the primary author. For users of this code I ask that any modifications
- * be released back into the community but with Michael Jennings chooses the
- * BSD license then that request has no backing in law.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies of the Software, its documentation and marketing & publicity
+ * materials, and acknowledgment shall be given in the documentation, materials
+ * and software packages that this Software was used.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
@@ -15,8 +20,9 @@
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ * Ported from <willem@stack.nl> Willem Monsuwe's original x86/MMX assembly
+ * code by Tres Melton in 2005 and 2006. Anything copyrightable by me is
+ * assigned to the Eterm project and its founder/maintainer: Michael Jennings.
*
* Much inspiration was drawn from the original x86 MMX port written by
* Willem Monsuwe <willem@stack.nl> in pure x86/MMX Assembly. The MMX
@@ -25,6 +31,8 @@
* ensure they worked with various gcc options. Further the code was
* extended to take advantage of the 128 bit xmm registers in SSE2.
*
+ * The imlib2 code in Enlightenment also has a lot to teach on the subject.
+ *
* Manuals used in this port:
* The Gnu Assembler
* http://www.gnu.org/software/binutils/manual/gas-2.9.1/html_mono/as.html
@@ -65,21 +73,7 @@
* assembly code in C functions is the only way to ensure that this code will continue to
* function through a (however unlikely) change. If pure assembly were to be used as the
* original MMX author, Willem Monsuwe, did and the ABI changed then this code would cease
- * to function properly. After examination of Willem's code I'm wondering if he
- * wrote it for GNU/Linux originally. The ENTER and LEAVE macros put all of the
- * parameters on the stack so that they can be accessed by references to the Base_Pointer
- * the way that ANSI C is defined. If he originally wrote this for GNU/Linux then he most
- * likely would have just used the registers instead of unwinding those optimizations
- * manually by pushing them to the stack. And if he explicitly wanted to use the stack
- * then there are parameters to gcc that would have performed those operations for him:
- * -mregparm/-mmemparm. Other gcc options that can tweak with the stack and the number
- * of registers available for function parameters are: -fcall-used/-fcall-saved,
- * -fcaller-saves, -fstack-protector, -fPIC/-fpic, -mno-push-args, etc.. It might be
- * advisable to check for these switches when using the original MMX code and emit a
- * warning if any are enabled. I know that the PIC option trashes the BX register and
- * that both Willem and I use that register. In other words If you do manage to get it
- * to compile & run w/ -fpic it WILL break. On the plus side, you can keep the pieces! :-)
- * On the other hand I could be wrong about everything
+ * to function properly.
*
* In Conclusion:
* Using C functions and inline assembly code should alleviate all of the concerns as the
@@ -126,8 +120,8 @@ void shade_ximage_15_sse2( volatile void *data, volatile int bpl, volatile int w
"jns 3f \n\t"
"2: \n\t" /* Start of the inner loop (pixels 8 at a time --> 8 * 16 = 128bits/xmm register ) */
"movdqu (%%rsi, %%rcx, 2), %%xmm0\n\t" /* Load the 16 bits of the pixel (5 bits for red, 6 bits for green, 5 bits for blue) */
- "movdqu %%xmm0, %%xmm1 \n\t" /* Create a copy of the pixel for the green color */
- "movdqu %%xmm0, %%xmm2 \n\t" /* Create a copy of the pixel for the blue color */
+ "movdqa %%xmm0, %%xmm1 \n\t" /* Create a copy of the pixel for the green color */
+ "movdqa %%xmm0, %%xmm2 \n\t" /* Create a copy of the pixel for the blue color */
"psrlw $5, %%xmm1 \n\t" /* Packed Shift Right Logical Words */
/* From A64_128bit_Media_Programming (p. 347) */
/* Shifts the blue off of the green color */
@@ -191,8 +185,8 @@ void shade_ximage_15_sse2( volatile void *data, volatile int bpl, volatile int w
"jns 8f \n\t"
"7: \n\t"
"movdqu (%%rsi, %%rcx, 2), %%xmm0\n\t"
- "movdqu %%xmm0, %%xmm1 \n\t"
- "movdqu %%xmm0, %%xmm2 \n\t"
+ "movdqa %%xmm0, %%xmm1 \n\t"
+ "movdqa %%xmm0, %%xmm2 \n\t"
"psrlw $5, %%xmm1 \n\t"
"psrlw $10, %%xmm0 \n\t"
"psllw $11, %%xmm2 \n\t"
@@ -301,8 +295,8 @@ void shade_ximage_16_sse2( volatile void *data, volatile int bpl, volatile int w
"jns 3f \n\t"
"2: \n\t" /* Start of the inner loop (pixels 8 at a time --> 8 * 16 = 128bits/xmm register ) */
"movdqu (%%rsi, %%rcx, 2), %%xmm0\n\t" /* Load the 16 bits of the pixel (5 bits for red, 6 bits for green, 5 bits for blue) */
- "movdqu %%xmm0, %%xmm1 \n\t" /* Create a copy of the pixel for the green color */
- "movdqu %%xmm0, %%xmm2 \n\t" /* Create a copy of the pixel for the blue color */
+ "movdqa %%xmm0, %%xmm1 \n\t" /* Create a copy of the pixel for the green color */
+ "movdqa %%xmm0, %%xmm2 \n\t" /* Create a copy of the pixel for the blue color */
"psrlw $5, %%xmm1 \n\t" /* Packed Shift Right Logical Words */
/* From A64_128bit_Media_Programming (p. 347) */
/* Shifts the blue off of the green color */
@@ -359,7 +353,7 @@ void shade_ximage_16_sse2( volatile void *data, volatile int bpl, volatile int w
"pcmpeqw %%xmm3, %%xmm3 \n\t" /* Packed Compare Equal Words */
/* From A64_128bit_Media_Programming (p. 276) */
/* This sets xmm3 to 128 1's (since mm6 = mm6) */
- "movdqu %%xmm3, %%xmm4 \n\t" /* Make copy of 128 ones */
+ "movdqa %%xmm3, %%xmm4 \n\t" /* Make copy of 128 ones */
"psllw $5, %%xmm3 \n\t" /* xmm3 = 8 copies of 1111 1111 1110 0000 */
"psllw $6, %%xmm4 \n\t" /* xmm4 = 8 copies of 1111 1111 1100 0000 */
"6: \n\t"
@@ -368,8 +362,8 @@ void shade_ximage_16_sse2( volatile void *data, volatile int bpl, volatile int w
"jns 8f \n\t"
"7: \n\t"
"movdqu (%%rsi, %%rcx, 2), %%xmm0\n\t"
- "movdqu %%xmm0, %%xmm1 \n\t"
- "movdqu %%xmm0, %%xmm2 \n\t"
+ "movdqa %%xmm0, %%xmm1 \n\t"
+ "movdqa %%xmm0, %%xmm2 \n\t"
"psrlw $5, %%xmm1 \n\t"
"psrlw $11, %%xmm0 \n\t"
"psllw $11, %%xmm2 \n\t"
@@ -480,7 +474,7 @@ void shade_ximage_32_sse2( volatile void *data, volatile int bpl, volatile int w
"psllw $15, %%xmm6 \n\t" /* Packed Shift Left Logical Words */
/* From A64_128bit_Media_Programming (p. 330) */
/* This sets 8 16 bit values of 1000 0000 0000 0000 in the 128 bit word */
- "movdqu %%xmm6, %%xmm5 \n\t" /* Copy mm6 to mm5 (we need mm6 later) */
+ "movdqa %%xmm6, %%xmm5 \n\t" /* Copy mm6 to mm5 (we need mm6 later) */
"pmulhw %%xmm4, %%xmm5 \n\t" /* Packed Multiply High Signed Word */
/* mm4 = ( mm4 * mm5 ) >> 16 (8 times, once for each 16bit value) */
/* For each color_ modifier (cm) */