diff options
author | David Schleef <ds@schleef.org> | 2005-01-08 06:01:10 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2005-01-08 06:01:10 +0000 |
commit | 4d44fde5765f6ad78053a29a1f514b2077cfd60a (patch) | |
tree | 964b4f47f28ecf07a821940daa6d4a4be5923f2e /liboil/conv/conv_3dnow.c | |
parent | 9e7d1e7c99c7d238cf2d87db12321349d5f4aeb5 (diff) | |
download | liboil-4d44fde5765f6ad78053a29a1f514b2077cfd60a.tar.gz |
* README: add some stuff to read
* configure.ac: check for ieee754.h header
* liboil/colorspace/argb_paint.c: (argb_paint_u8_ref),
(argb_paint_u8_fast): change algorithm to be more accurate
* liboil/colorspace/argb_paint_i386.c: (argb_paint_u8_mmx): same
* liboil/conv/conv_3dnow.c: (conv_f32_s16_3dnow),
(conv_s32_f32_3dnow): make these implementations actually work
* liboil/conv/conv_bitstuff.c: compile fixes
* liboil/conv/conv_misc.c: (conv_f64_s16_table),
(conv_f32_s16_table): add a f32 impl
* liboil/simdpack/diffsquaresum_f64.c: (diffsquaresum_f64_ref),
(diffsquaresum_f64_i10_simple), (diffsquaresum_f64_i10_fast):
fix implementation
Diffstat (limited to 'liboil/conv/conv_3dnow.c')
-rw-r--r-- | liboil/conv/conv_3dnow.c | 43 |
1 files changed, 25 insertions, 18 deletions
diff --git a/liboil/conv/conv_3dnow.c b/liboil/conv/conv_3dnow.c index 54da52f..af7df49 100644 --- a/liboil/conv/conv_3dnow.c +++ b/liboil/conv/conv_3dnow.c @@ -34,45 +34,52 @@ #ifdef __GNUC__ /* suboptimal */ -static void conv_f32_s32_3dnow(float *dst, int dst_stride, int32_t *src, int src_stride, int n) +static void +conv_f32_s16_3dnow(float *dst, int dst_stride, int16_t *src, int src_stride, + int n) { int i; - if (n & 1) - *dst++ = (float) *src++; - n /= 2; - for(i=0;i<n;i++){ asm volatile( - " pi2fd 0(%0), %%mm0 \n" - " movq %%mm0, 0(%1) \n" + " xor %%eax, %%eax \n" + " movw 0(%0), %%eax \n" + " movd %%eax, %%mm0 \n" + " pi2fd 0(%0), %%mm0 \n" + " movd %%mm0, 0(%1) \n" : - : "a" (src), "c" (dst) - : "mm0" + : "r" (src), "r" (dst) + : "eax", "mm0" ); dst = OIL_OFFSET(dst, dst_stride); src = OIL_OFFSET(src, src_stride); } asm volatile ("emms"); } -OIL_DEFINE_IMPL_FULL(conv_f32_s32_3dnow, conv_f32_s32, +OIL_DEFINE_IMPL_FULL(conv_f32_s16_3dnow, conv_f32_s16, OIL_IMPL_FLAG_3DNOW); /* suboptimal */ -static void conv_s32_f32_3dnow (int32_t *dst, int dst_stride, float *src, int src_stride, int n) +static void +conv_s32_f32_3dnow (int32_t *dst, int dst_stride, float *src, int src_stride, + int n) { int i; - - if (n & 1) - *src++ = (int32_t) *dst++; - n /= 2; + const float constants[][2] = { + { -0.5, -0.5 }, + { -1.0, -1.0 } + }; for(i=0;i<n;i++){ asm volatile( - " pf2id 0(%0), %%mm0 \n" - " movq %%mm0, 0(%1) \n" + " movq 0(%0), %%mm0 \n" + " pfadd 0(%2), %%mm0 \n" + " pf2id %%mm0, %%mm1 \n" + " pfcmpgt 0(%2), %%mm0 \n" + " paddd %%mm0, %%mm1 \n" + " movd %%mm1, 0(%1) \n" : - : "a" (src), "c" (dst) + : "r" (src), "r" (dst), "r" (constants) : "mm0" ); dst = OIL_OFFSET(dst, dst_stride); |