diff options
author | David Schleef <ds@schleef.org> | 2005-08-15 21:33:39 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2005-08-15 21:33:39 +0000 |
commit | ec572e49fb3423767ab7d562c5ef0aa2ad4ff38b (patch) | |
tree | 2aa40503eca7623296b431e79d22e6e4720e2be8 /liboil/simdpack | |
parent | 1ef601312c0634d55f1a098be769eae436dd0b92 (diff) | |
download | liboil-ec572e49fb3423767ab7d562c5ef0aa2ad4ff38b.tar.gz |
* configure.ac: Add some altivec theora code
* liboil/Makefile.am:
* liboil/powerpc/Makefile.am:
* liboil/powerpc/fdct8x8theora_altivec.c: (fdct8x8theora_altivec):
* liboil/powerpc/recon8x8_altivec.c: (recon8x8_intra_altivec),
(recon8x8_inter_altivec), (recon8x8_inter2_altivec):
* liboil/powerpc/recon8x8_ppc.c: (recon8x8_intra_ppc),
(recon8x8_inter_ppc), (recon8x8_inter2_ppc):
* liboil/colorspace/composite.c: Fix bug in ADD operator.
* liboil/dct/fdct8x8theora_i386.c:
* liboil/simdpack/average2_u8.c: (average2_u8_trick),
(average2_u8_unroll4): Fix n%4!=0 problems noticed by thomasvs.
* liboil/simdpack/scalarmult_i386.c: (scalarmult_f32_sse): Fix
n%4!=0 problems.
* testsuite/stride.c: (main): use a random n to test possible
endpoint problems.
Diffstat (limited to 'liboil/simdpack')
-rw-r--r-- | liboil/simdpack/average2_u8.c | 80 | ||||
-rw-r--r-- | liboil/simdpack/scalarmult_i386.c | 6 |
2 files changed, 26 insertions, 60 deletions
diff --git a/liboil/simdpack/average2_u8.c b/liboil/simdpack/average2_u8.c index fe07aa8..3ac8c6a 100644 --- a/liboil/simdpack/average2_u8.c +++ b/liboil/simdpack/average2_u8.c @@ -57,35 +57,27 @@ average2_u8_trick (uint8_t * dest, int dstr, uint8_t *src1, int sstr1, { unsigned int x, y, d; -#if 0 - if (sstr1 == 1 && sstr2 == 1 && dstr == 1) { - while (n > 0) { - x = *(unsigned int *) src1; - y = *(unsigned int *) src2; - *(unsigned int *) dest = (((x ^ y) & 0xfefefefe) >> 1) + (x & y); - src1 += 4; - src2 += 4; - dest += 4; - n -= 4; - } - } else -#endif - { - while (n > 0) { - x = (src1[0] << 24) | (src1[sstr1] << 16) | (src1[2 * - sstr1] << 8) | (src1[3 * sstr1]); - y = (src2[0] << 24) | (src2[sstr2] << 16) | (src2[2 * - sstr2] << 8) | (src2[3 * sstr2]); - d = (((x ^ y) & 0xfefefefe) >> 1) + (x & y); - dest[0] = (d >> 24); - dest[1*dstr] = (d >> 16); - dest[2*dstr] = (d >> 8); - dest[3*dstr] = (d >> 0); - src1 += 4 * sstr1; - src2 += 4 * sstr2; - dest += 4 * dstr; - n -= 4; - } + while (n&3) { + *dest = (*src1 + *src2) >> 1; + src1 += sstr1; + src2 += sstr2; + dest += dstr; + n--; + } + while (n > 0) { + x = (src1[0] << 24) | (src1[sstr1] << 16) | (src1[2 * + sstr1] << 8) | (src1[3 * sstr1]); + y = (src2[0] << 24) | (src2[sstr2] << 16) | (src2[2 * + sstr2] << 8) | (src2[3 * sstr2]); + d = (((x ^ y) & 0xfefefefe) >> 1) + (x & y); + dest[0] = (d >> 24); + dest[1*dstr] = (d >> 16); + dest[2*dstr] = (d >> 8); + dest[3*dstr] = (d >> 0); + src1 += 4 * sstr1; + src2 += 4 * sstr2; + dest += 4 * dstr; + n -= 4; } } @@ -112,6 +104,7 @@ average2_u8_unroll4 (uint8_t * dest, int dstr, uint8_t *src1, int sstr1, { while (n & 0x3) { *dest = (*src1 + *src2) >> 1; + dest += dstr; src1 += sstr1; src2 += sstr2; n--; @@ -139,32 +132,3 @@ average2_u8_unroll4 (uint8_t * dest, int dstr, uint8_t *src1, int sstr1, OIL_DEFINE_IMPL (average2_u8_unroll4, average2_u8); -#if 0 /* doesn't compile */ -#ifdef HAVE_CPU_I386 -/* This doesn't work with sstr!=2 or dstr!=2 */ -static void -average2_u8_i386asm (uint8_t * dest, int dstr, uint8_t *src1, int sstr1, - uint8_t *src2, int sstr2, int n) -{ - __asm__ __volatile__ ("\n" - " .p2align 4,,15 \n" - "1: movzbl (%%ebx), %%eax \n" - " addl $2, %%ebx \n" - " movzbl (%%ecx), %%edx \n" - " addl $2, %%ecx \n" - " leal 1(%%edx, %%eax), %%eax \n" - " sarl $1, %%eax \n" - " movb %%al, 0(%%esi) \n" - " incl %%esi \n" - " decl %%edi \n" - " jg 1b \n":"+b" - (src1), "+c" (src2), "+D" (n), "+S" (dest) - ::"eax", "edx"); - -} - -OIL_DEFINE_IMPL (average2_u8_i386asm, average2_u8); -#endif -#endif - - diff --git a/liboil/simdpack/scalarmult_i386.c b/liboil/simdpack/scalarmult_i386.c index 232dc83..bade779 100644 --- a/liboil/simdpack/scalarmult_i386.c +++ b/liboil/simdpack/scalarmult_i386.c @@ -48,7 +48,7 @@ scalarmult_f32_sse (float *dest, int dstr, float *src, int sstr, " movss (%0), %%xmm1 \n" : : "r" (t)); - for(i=0;i<n;i+=4) { + for(i=0;i<n-3;i+=4) { t[0] = OIL_GET(src,sstr*(i + 0), float); t[1] = OIL_GET(src,sstr*(i + 1), float); t[2] = OIL_GET(src,sstr*(i + 2), float); @@ -64,7 +64,9 @@ scalarmult_f32_sse (float *dest, int dstr, float *src, int sstr, OIL_GET(dest,dstr*(i + 2), float) = t[2]; OIL_GET(dest,dstr*(i + 3), float) = t[3]; } - + for(;i<n;i++){ + OIL_GET(dest,dstr*i, float) = *val * OIL_GET(src,sstr*i, float); + } } OIL_DEFINE_IMPL_FULL (scalarmult_f32_sse, scalarmult_f32, OIL_IMPL_FLAG_SSE); |