summaryrefslogtreecommitdiff
path: root/liboil/conv/conv_3dnow.c
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2005-01-08 06:01:10 +0000
committerDavid Schleef <ds@schleef.org>2005-01-08 06:01:10 +0000
commit4d44fde5765f6ad78053a29a1f514b2077cfd60a (patch)
tree964b4f47f28ecf07a821940daa6d4a4be5923f2e /liboil/conv/conv_3dnow.c
parent9e7d1e7c99c7d238cf2d87db12321349d5f4aeb5 (diff)
downloadliboil-4d44fde5765f6ad78053a29a1f514b2077cfd60a.tar.gz
* README: add some stuff to read
* configure.ac: check for ieee754.h header * liboil/colorspace/argb_paint.c: (argb_paint_u8_ref), (argb_paint_u8_fast): change algorithm to be more accurate * liboil/colorspace/argb_paint_i386.c: (argb_paint_u8_mmx): same * liboil/conv/conv_3dnow.c: (conv_f32_s16_3dnow), (conv_s32_f32_3dnow): make these implementations actually work * liboil/conv/conv_bitstuff.c: compile fixes * liboil/conv/conv_misc.c: (conv_f64_s16_table), (conv_f32_s16_table): add a f32 impl * liboil/simdpack/diffsquaresum_f64.c: (diffsquaresum_f64_ref), (diffsquaresum_f64_i10_simple), (diffsquaresum_f64_i10_fast): fix implementation
Diffstat (limited to 'liboil/conv/conv_3dnow.c')
-rw-r--r--liboil/conv/conv_3dnow.c43
1 files changed, 25 insertions, 18 deletions
diff --git a/liboil/conv/conv_3dnow.c b/liboil/conv/conv_3dnow.c
index 54da52f..af7df49 100644
--- a/liboil/conv/conv_3dnow.c
+++ b/liboil/conv/conv_3dnow.c
@@ -34,45 +34,52 @@
#ifdef __GNUC__
/* suboptimal */
-static void conv_f32_s32_3dnow(float *dst, int dst_stride, int32_t *src, int src_stride, int n)
+static void
+conv_f32_s16_3dnow(float *dst, int dst_stride, int16_t *src, int src_stride,
+ int n)
{
int i;
- if (n & 1)
- *dst++ = (float) *src++;
- n /= 2;
-
for(i=0;i<n;i++){
asm volatile(
- " pi2fd 0(%0), %%mm0 \n"
- " movq %%mm0, 0(%1) \n"
+ " xor %%eax, %%eax \n"
+ " movw 0(%0), %%eax \n"
+ " movd %%eax, %%mm0 \n"
+ " pi2fd 0(%0), %%mm0 \n"
+ " movd %%mm0, 0(%1) \n"
:
- : "a" (src), "c" (dst)
- : "mm0"
+ : "r" (src), "r" (dst)
+ : "eax", "mm0"
);
dst = OIL_OFFSET(dst, dst_stride);
src = OIL_OFFSET(src, src_stride);
}
asm volatile ("emms");
}
-OIL_DEFINE_IMPL_FULL(conv_f32_s32_3dnow, conv_f32_s32,
+OIL_DEFINE_IMPL_FULL(conv_f32_s16_3dnow, conv_f32_s16,
OIL_IMPL_FLAG_3DNOW);
/* suboptimal */
-static void conv_s32_f32_3dnow (int32_t *dst, int dst_stride, float *src, int src_stride, int n)
+static void
+conv_s32_f32_3dnow (int32_t *dst, int dst_stride, float *src, int src_stride,
+ int n)
{
int i;
-
- if (n & 1)
- *src++ = (int32_t) *dst++;
- n /= 2;
+ const float constants[][2] = {
+ { -0.5, -0.5 },
+ { -1.0, -1.0 }
+ };
for(i=0;i<n;i++){
asm volatile(
- " pf2id 0(%0), %%mm0 \n"
- " movq %%mm0, 0(%1) \n"
+ " movq 0(%0), %%mm0 \n"
+ " pfadd 0(%2), %%mm0 \n"
+ " pf2id %%mm0, %%mm1 \n"
+ " pfcmpgt 0(%2), %%mm0 \n"
+ " paddd %%mm0, %%mm1 \n"
+ " movd %%mm1, 0(%1) \n"
:
- : "a" (src), "c" (dst)
+ : "r" (src), "r" (dst), "r" (constants)
: "mm0"
);
dst = OIL_OFFSET(dst, dst_stride);