summaryrefslogtreecommitdiff
path: root/liboil/simdpack
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2005-08-15 21:33:39 +0000
committerDavid Schleef <ds@schleef.org>2005-08-15 21:33:39 +0000
commitec572e49fb3423767ab7d562c5ef0aa2ad4ff38b (patch)
tree2aa40503eca7623296b431e79d22e6e4720e2be8 /liboil/simdpack
parent1ef601312c0634d55f1a098be769eae436dd0b92 (diff)
downloadliboil-ec572e49fb3423767ab7d562c5ef0aa2ad4ff38b.tar.gz
* configure.ac: Add some altivec theora code
* liboil/Makefile.am: * liboil/powerpc/Makefile.am: * liboil/powerpc/fdct8x8theora_altivec.c: (fdct8x8theora_altivec): * liboil/powerpc/recon8x8_altivec.c: (recon8x8_intra_altivec), (recon8x8_inter_altivec), (recon8x8_inter2_altivec): * liboil/powerpc/recon8x8_ppc.c: (recon8x8_intra_ppc), (recon8x8_inter_ppc), (recon8x8_inter2_ppc): * liboil/colorspace/composite.c: Fix bug in ADD operator. * liboil/dct/fdct8x8theora_i386.c: * liboil/simdpack/average2_u8.c: (average2_u8_trick), (average2_u8_unroll4): Fix n%4!=0 problems noticed by thomasvs. * liboil/simdpack/scalarmult_i386.c: (scalarmult_f32_sse): Fix n%4!=0 problems. * testsuite/stride.c: (main): use a random n to test possible endpoint problems.
Diffstat (limited to 'liboil/simdpack')
-rw-r--r--liboil/simdpack/average2_u8.c80
-rw-r--r--liboil/simdpack/scalarmult_i386.c6
2 files changed, 26 insertions, 60 deletions
diff --git a/liboil/simdpack/average2_u8.c b/liboil/simdpack/average2_u8.c
index fe07aa8..3ac8c6a 100644
--- a/liboil/simdpack/average2_u8.c
+++ b/liboil/simdpack/average2_u8.c
@@ -57,35 +57,27 @@ average2_u8_trick (uint8_t * dest, int dstr, uint8_t *src1, int sstr1,
{
unsigned int x, y, d;
-#if 0
- if (sstr1 == 1 && sstr2 == 1 && dstr == 1) {
- while (n > 0) {
- x = *(unsigned int *) src1;
- y = *(unsigned int *) src2;
- *(unsigned int *) dest = (((x ^ y) & 0xfefefefe) >> 1) + (x & y);
- src1 += 4;
- src2 += 4;
- dest += 4;
- n -= 4;
- }
- } else
-#endif
- {
- while (n > 0) {
- x = (src1[0] << 24) | (src1[sstr1] << 16) | (src1[2 *
- sstr1] << 8) | (src1[3 * sstr1]);
- y = (src2[0] << 24) | (src2[sstr2] << 16) | (src2[2 *
- sstr2] << 8) | (src2[3 * sstr2]);
- d = (((x ^ y) & 0xfefefefe) >> 1) + (x & y);
- dest[0] = (d >> 24);
- dest[1*dstr] = (d >> 16);
- dest[2*dstr] = (d >> 8);
- dest[3*dstr] = (d >> 0);
- src1 += 4 * sstr1;
- src2 += 4 * sstr2;
- dest += 4 * dstr;
- n -= 4;
- }
+ while (n&3) {
+ *dest = (*src1 + *src2) >> 1;
+ src1 += sstr1;
+ src2 += sstr2;
+ dest += dstr;
+ n--;
+ }
+ while (n > 0) {
+ x = (src1[0] << 24) | (src1[sstr1] << 16) | (src1[2 *
+ sstr1] << 8) | (src1[3 * sstr1]);
+ y = (src2[0] << 24) | (src2[sstr2] << 16) | (src2[2 *
+ sstr2] << 8) | (src2[3 * sstr2]);
+ d = (((x ^ y) & 0xfefefefe) >> 1) + (x & y);
+ dest[0] = (d >> 24);
+ dest[1*dstr] = (d >> 16);
+ dest[2*dstr] = (d >> 8);
+ dest[3*dstr] = (d >> 0);
+ src1 += 4 * sstr1;
+ src2 += 4 * sstr2;
+ dest += 4 * dstr;
+ n -= 4;
}
}
@@ -112,6 +104,7 @@ average2_u8_unroll4 (uint8_t * dest, int dstr, uint8_t *src1, int sstr1,
{
while (n & 0x3) {
*dest = (*src1 + *src2) >> 1;
+ dest += dstr;
src1 += sstr1;
src2 += sstr2;
n--;
@@ -139,32 +132,3 @@ average2_u8_unroll4 (uint8_t * dest, int dstr, uint8_t *src1, int sstr1,
OIL_DEFINE_IMPL (average2_u8_unroll4, average2_u8);
-#if 0 /* doesn't compile */
-#ifdef HAVE_CPU_I386
-/* This doesn't work with sstr!=2 or dstr!=2 */
-static void
-average2_u8_i386asm (uint8_t * dest, int dstr, uint8_t *src1, int sstr1,
- uint8_t *src2, int sstr2, int n)
-{
- __asm__ __volatile__ ("\n"
- " .p2align 4,,15 \n"
- "1: movzbl (%%ebx), %%eax \n"
- " addl $2, %%ebx \n"
- " movzbl (%%ecx), %%edx \n"
- " addl $2, %%ecx \n"
- " leal 1(%%edx, %%eax), %%eax \n"
- " sarl $1, %%eax \n"
- " movb %%al, 0(%%esi) \n"
- " incl %%esi \n"
- " decl %%edi \n"
- " jg 1b \n":"+b"
- (src1), "+c" (src2), "+D" (n), "+S" (dest)
- ::"eax", "edx");
-
-}
-
-OIL_DEFINE_IMPL (average2_u8_i386asm, average2_u8);
-#endif
-#endif
-
-
diff --git a/liboil/simdpack/scalarmult_i386.c b/liboil/simdpack/scalarmult_i386.c
index 232dc83..bade779 100644
--- a/liboil/simdpack/scalarmult_i386.c
+++ b/liboil/simdpack/scalarmult_i386.c
@@ -48,7 +48,7 @@ scalarmult_f32_sse (float *dest, int dstr, float *src, int sstr,
" movss (%0), %%xmm1 \n"
:
: "r" (t));
- for(i=0;i<n;i+=4) {
+ for(i=0;i<n-3;i+=4) {
t[0] = OIL_GET(src,sstr*(i + 0), float);
t[1] = OIL_GET(src,sstr*(i + 1), float);
t[2] = OIL_GET(src,sstr*(i + 2), float);
@@ -64,7 +64,9 @@ scalarmult_f32_sse (float *dest, int dstr, float *src, int sstr,
OIL_GET(dest,dstr*(i + 2), float) = t[2];
OIL_GET(dest,dstr*(i + 3), float) = t[3];
}
-
+ for(;i<n;i++){
+ OIL_GET(dest,dstr*i, float) = *val * OIL_GET(src,sstr*i, float);
+ }
}
OIL_DEFINE_IMPL_FULL (scalarmult_f32_sse, scalarmult_f32, OIL_IMPL_FLAG_SSE);