summaryrefslogtreecommitdiff
path: root/liboil/simdpack
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2005-01-09 23:43:20 +0000
committerDavid Schleef <ds@schleef.org>2005-01-09 23:43:20 +0000
commita3964318ce4288213110f15b6675289f0a11459c (patch)
tree4ba7e0ac735956088e8059bcd2980bdf0f72d03e /liboil/simdpack
parent99f22dbf543dcc531dc2b1118551d51a227059e2 (diff)
downloadliboil-a3964318ce4288213110f15b6675289f0a11459c.tar.gz
* examples/uberopt/uberopt.c: (main): Use gsize
* liboil/colorspace/rgb2bgr_powerpc.c: (rgb2bgr_ppc), (rgb2bgr_ppc2): Fix powerpc asm * liboil/colorspace/rgb2rgba_powerpc.c: (rgb2rgba_ppc): same * liboil/conv/conv_powerpc.c: (_sl_clipconv_S8_F32__powerpc_altivec): same * liboil/copy/copy_powerpc.c: (copy_u8_altivec), (copy_u8_altivec2), (copy_u8_altivec3): same * liboil/liboilfunction.c: (oil_impl_is_runnable), (oil_class_optimize): add is_runnable() function * liboil/liboilfunction.h: same * liboil/simdpack/abs_powerpc.c: (abs_u16_s16_a16_altivec): fix asm * liboil/simdpack/clip_powerpc.c: (clip_s16_ppcasm), (clip_s16_ppcasm2), (clip_s16_ppcasm3): same * liboil/simdpack/mix_powerpc.c: (mix_u8_a16_altivec): same * testsuite/copy.c: (main): use oil_impl_is_runnable()
Diffstat (limited to 'liboil/simdpack')
-rw-r--r--liboil/simdpack/abs_powerpc.c14
-rw-r--r--liboil/simdpack/clip_powerpc.c84
-rw-r--r--liboil/simdpack/mix_powerpc.c46
3 files changed, 72 insertions, 72 deletions
diff --git a/liboil/simdpack/abs_powerpc.c b/liboil/simdpack/abs_powerpc.c
index 1fe0928..a95ec9a 100644
--- a/liboil/simdpack/abs_powerpc.c
+++ b/liboil/simdpack/abs_powerpc.c
@@ -47,14 +47,14 @@ abs_u16_s16_a16_altivec (uint16_t * dest, int dstr, int16_t * src, int sstr,
}
n /= 8;
__asm__ __volatile__ ("\n"
- " li %%r10, 0 \n"
- " vxor %%v2, %%v2, %%v2 \n"
+ " li r10, 0 \n"
+ " vxor v2, v2, v2 \n"
" mtctr %2 \n"
- "1: lvx %%v0,%%r10,%1 \n"
- " vsubshs %%v1, %%v2, %%v0 \n"
- " vmaxsh %%v1, %%v1, %%v0 \n"
- " stvx %%v1,%%r10,%0 \n"
- " addi %%r10, %%r10, 16 \n"
+ "1: lvx v0,r10,%1 \n"
+ " vsubshs v1, v2, v0 \n"
+ " vmaxsh v1, v1, v0 \n"
+ " stvx v1,r10,%0 \n"
+ " addi r10, r10, 16 \n"
" bdnz 1b \n":"+b" (dest), "+b" (src), "+b" (n)
::"10", "ctr");
}
diff --git a/liboil/simdpack/clip_powerpc.c b/liboil/simdpack/clip_powerpc.c
index dcd3055..b122e4b 100644
--- a/liboil/simdpack/clip_powerpc.c
+++ b/liboil/simdpack/clip_powerpc.c
@@ -37,19 +37,19 @@ clip_s16_ppcasm (int16_t *dest, int dstr, int16_t *src, int sstr, int n,
int16_t *low, int16_t *hi)
{
__asm__ __volatile__(
- " li 7, 0 \n"
+ " li r7, 0 \n"
" mtctr %4 \n"
- "1: lhax 9,7,%1 \n"
- " add 10,9,%2 \n"
- " subfc 8,9,%3 \n"
- " srawi 0,10,31 \n"
- " srawi 11,8,31 \n"
- " and 0,0,10 \n"
- " and 11,11,8 \n"
- " subf 9,0,9 \n"
- " add 9,9,11 \n"
- " sthx 9,7,%0 \n"
- " addi 7,7,2 \n"
+ "1: lhax r9,r7,%1 \n"
+ " add r10,r9,%2 \n"
+ " subfc r8,r9,%3 \n"
+ " srawi r0,r10,31 \n"
+ " srawi r11,r8,31 \n"
+ " and r0,r0,r10 \n"
+ " and r11,r11,r8 \n"
+ " subf r9,r0,r9 \n"
+ " add r9,r9,r11 \n"
+ " sthx r9,r7,%0 \n"
+ " addi r7,r7,2 \n"
" bdnz 1b \n"
:
: "b" (dest), "b" (src), "b" (-*low), "b" (*hi), "b" (n)
@@ -65,16 +65,16 @@ clip_s16_ppcasm2 (int16_t *dest, int dstr, int16_t *src, int sstr, int n,
dest--;
__asm__ __volatile__(
" mtctr %4 \n"
- "1: lhau 9,2(%1) \n"
- " add 10,9,%2 \n"
- " subfc 8,9,%3 \n"
- " srawi 0,10,31 \n"
- " srawi 11,8,31 \n"
- " and 0,0,10 \n"
- " and 11,11,8 \n"
- " subf 9,0,9 \n"
- " add 9,9,11 \n"
- " sthu 9,2(%0) \n"
+ "1: lhau r9,2(%1) \n"
+ " add r10,r9,%2 \n"
+ " subfc r8,r9,%3 \n"
+ " srawi r0,r10,31 \n"
+ " srawi r11,r8,31 \n"
+ " and r0,r0,r10 \n"
+ " and r11,r11,r8 \n"
+ " subf r9,r0,r9 \n"
+ " add r9,r9,r11 \n"
+ " sthu r9,2(%0) \n"
" bdnz 1b \n"
:
: "b" (dest), "b" (src), "b" (-*low), "b" (*hi), "b" (n)
@@ -94,26 +94,26 @@ clip_s16_ppcasm3 (int16_t *dest, int dstr, int16_t *src, int sstr, int n,
n/=2;
__asm__ __volatile__("\n"
" mtctr %4 \n"
- "1: lhau 9,2(%1) \n"
- " add 10,9,%2 \n"
- " lhau 19,2(%1) \n"
- " subfc 8,9,%3 \n"
- " add 20,19,%2 \n"
- " srawi 0,10,31 \n"
- " subfc 18,19,%3 \n"
- " srawi 11,8,31 \n"
- " srawi 22,20,31 \n"
- " and 0,0,10 \n"
- " srawi 21,18,31 \n"
- " and 11,11,8 \n"
- " and 22,22,20 \n"
- " subf 9,0,9 \n"
- " and 21,21,18 \n"
- " add 9,9,11 \n"
- " subf 19,22,19 \n"
- " sthu 9,2(%0) \n"
- " add 19,19,21 \n"
- " sthu 19,2(%0) \n"
+ "1: lhau r9,2(%1) \n"
+ " add r10,r9,%2 \n"
+ " lhau r19,2(%1) \n"
+ " subfc r8,r9,%3 \n"
+ " add r20,r19,%2 \n"
+ " srawi r0,r10,31 \n"
+ " subfc r18,r19,%3 \n"
+ " srawi r11,r8,31 \n"
+ " srawi r22,r20,31 \n"
+ " and r0,r0,r10 \n"
+ " srawi r21,r18,31 \n"
+ " and r11,r11,r8 \n"
+ " and r22,r22,r20 \n"
+ " subf r9,r0,r9 \n"
+ " and r21,r21,r18 \n"
+ " add r9,r9,r11 \n"
+ " subf r19,r22,r19 \n"
+ " sthu r9,2(%0) \n"
+ " add r19,r19,r21 \n"
+ " sthu r19,2(%0) \n"
" bdnz 1b \n"
:
: "b" (dest), "b" (src), "b" (-*low), "b" (*hi), "b" (n)
diff --git a/liboil/simdpack/mix_powerpc.c b/liboil/simdpack/mix_powerpc.c
index 21b4b54..16e27c6 100644
--- a/liboil/simdpack/mix_powerpc.c
+++ b/liboil/simdpack/mix_powerpc.c
@@ -46,33 +46,33 @@ static void mix_u8_a16_altivec(uint8_t *dest, uint8_t *src1, uint8_t *src2, uint
}
__asm__ __volatile__("\n"
- " li %%r12, 0 \n"
- " vxor %%v3, %%v3, %%v3 \n"
- " lvx %%v12, %4, %%r12 \n"
+ " li r12, 0 \n"
+ " vxor v3, v3, v3 \n"
+ " lvx v12, %4, r12 \n"
" mtctr %5 \n"
- "1: lvx %%v0, %1, %%r12 \n"
- " lvx %%v1, %2, %%r12 \n"
- " lvx %%v2, %3, %%r12 \n"
+ "1: lvx v0, %1, r12 \n"
+ " lvx v1, %2, r12 \n"
+ " lvx v2, %3, r12 \n"
- " vmrghb %%v4, %%v3, %%v0 \n"
- " vmrghb %%v5, %%v3, %%v1 \n"
- " vmrghb %%v6, %%v3, %%v2 \n"
- " vmrghb %%v7, %%v1, %%v3 \n"
- " vsubuhs %%v8, %%v7, %%v5 \n"
- " vsubshs %%v9, %%v4, %%v5 \n"
- " vmladduhm %%v10, %%v9, %%v6, %%v8 \n"
+ " vmrghb v4, v3, v0 \n"
+ " vmrghb v5, v3, v1 \n"
+ " vmrghb v6, v3, v2 \n"
+ " vmrghb v7, v1, v3 \n"
+ " vsubuhs v8, v7, v5 \n"
+ " vsubshs v9, v4, v5 \n"
+ " vmladduhm v10, v9, v6, v8 \n"
- " vmrglb %%v4, %%v3, %%v0 \n"
- " vmrglb %%v5, %%v3, %%v1 \n"
- " vmrglb %%v6, %%v3, %%v2 \n"
- " vmrglb %%v7, %%v1, %%v3 \n"
- " vsubuhs %%v8, %%v7, %%v5 \n"
- " vsubshs %%v9, %%v4, %%v5 \n"
- " vmladduhm %%v11, %%v9, %%v6, %%v8 \n"
+ " vmrglb v4, v3, v0 \n"
+ " vmrglb v5, v3, v1 \n"
+ " vmrglb v6, v3, v2 \n"
+ " vmrglb v7, v1, v3 \n"
+ " vsubuhs v8, v7, v5 \n"
+ " vsubshs v9, v4, v5 \n"
+ " vmladduhm v11, v9, v6, v8 \n"
- " vperm %%v0, %%v10, %%v11, %%v12 \n"
- " stvx %%v0, %0, %%r12 \n"
- " addi %%r12, %%r12, 16 \n"
+ " vperm v0, v10, v11, v12 \n"
+ " stvx v0, %0, r12 \n"
+ " addi r12, r12, 16 \n"
" bdnz 1b \n"
: "+b" (dest), "+b" (src1), "+b" (src2), "+b" (scale), "+b" (perm)
: "r" (n/16)