diff options
author | David Schleef <ds@schleef.org> | 2005-01-08 06:01:10 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2005-01-08 06:01:10 +0000 |
commit | 4d44fde5765f6ad78053a29a1f514b2077cfd60a (patch) | |
tree | 964b4f47f28ecf07a821940daa6d4a4be5923f2e | |
parent | 9e7d1e7c99c7d238cf2d87db12321349d5f4aeb5 (diff) | |
download | liboil-4d44fde5765f6ad78053a29a1f514b2077cfd60a.tar.gz |
* README: add some stuff to read
* configure.ac: check for ieee754.h header
* liboil/colorspace/argb_paint.c: (argb_paint_u8_ref),
(argb_paint_u8_fast): change algorithm to be more accurate
* liboil/colorspace/argb_paint_i386.c: (argb_paint_u8_mmx): same
* liboil/conv/conv_3dnow.c: (conv_f32_s16_3dnow),
(conv_s32_f32_3dnow): make these implementations actually work
* liboil/conv/conv_bitstuff.c: compile fixes
* liboil/conv/conv_misc.c: (conv_f64_s16_table),
(conv_f32_s16_table): add a f32 impl
* liboil/simdpack/diffsquaresum_f64.c: (diffsquaresum_f64_ref),
(diffsquaresum_f64_i10_simple), (diffsquaresum_f64_i10_fast):
fix implementation
-rw-r--r-- | ChangeLog | 16 | ||||
-rw-r--r-- | README | 52 | ||||
-rw-r--r-- | configure.ac | 3 | ||||
-rw-r--r-- | liboil/colorspace/argb_paint.c | 20 | ||||
-rw-r--r-- | liboil/colorspace/argb_paint_i386.c | 16 | ||||
-rw-r--r-- | liboil/conv/conv_3dnow.c | 43 | ||||
-rw-r--r-- | liboil/conv/conv_bitstuff.c | 21 | ||||
-rw-r--r-- | liboil/conv/conv_misc.c | 40 | ||||
-rw-r--r-- | liboil/simdpack/diffsquaresum_f64.c | 25 |
9 files changed, 182 insertions, 54 deletions
@@ -1,3 +1,19 @@ +2005-01-07 David Schleef <ds@schleef.org> + + * README: add some stuff to read + * configure.ac: check for ieee754.h header + * liboil/colorspace/argb_paint.c: (argb_paint_u8_ref), + (argb_paint_u8_fast): change algorithm to be more accurate + * liboil/colorspace/argb_paint_i386.c: (argb_paint_u8_mmx): same + * liboil/conv/conv_3dnow.c: (conv_f32_s16_3dnow), + (conv_s32_f32_3dnow): make these implementations actually work + * liboil/conv/conv_bitstuff.c: compile fixes + * liboil/conv/conv_misc.c: (conv_f64_s16_table), + (conv_f32_s16_table): add a f32 impl + * liboil/simdpack/diffsquaresum_f64.c: (diffsquaresum_f64_ref), + (diffsquaresum_f64_i10_simple), (diffsquaresum_f64_i10_fast): + fix implementation + 2005-01-05 David Schleef <ds@schleef.org> * testsuite/stride.c: (main), (hist): new test @@ -1,15 +1,53 @@ -Ideas: +ABI warning +=========== -implementations should be flagged as to whether or not they contain -assembly, and also whether or not they are the product of the -alternate optimization. This way, testing code can automatically -flag functions where the alternate optimization is faster than the -C or asm code, indicating that the compiler output should be turned -into assembly. +In general, liboil is not ABI stable. However, portions of liboil +are guaranteed to be stable through the 0.3.x series, and using +a compatibility library, through the 0.4.x series as well. This +policy is designed to provide almost all applications with ABI +stability for the symbols most used in liboil. +Any symbols declared by including <liboil/liboil.h> follow this +ABI policy. This includes all liboil function classes as well +as a limited number of core functionality, such as oil_init(). + +ABI Implementation +================== + +The liboil-0.3.x series create the shared library liboil-0.3.so. +Applications that use liboil are linked against this library, and +will load liboil-0.3.so at runtime. + +The liboil-0.4.x series will create two shared libraries, +liboil-0.4.so and a compaitiblity library liboil-0.3.so that +implements functionality removed in the 0.3->0.4 transition. +The 0.3 shared library will be binary compatible with the +liboil-0.3.x releases. Thus, applictions compiled with a +liboil-0.3.x release will continue to function after the shared +library is upgraded to a 0.4.x release. Applications compiled +with 0.4.x will use liboil-0.4.so directly. + +Distributions can use this compatibility library in order to +smooth transitions between liboil major releases. For example, +consider the case where AppA depends on libB and libC, and both +libraries use liboil. When liboil-0.4 is released, the distro +can start using it immediately, and AppA will continue to work +correctly. Then, at a convenient time, libB and libC can +independently be recompiled using liboil-0.4.x, and the packages +will no longer depend on liboil-0.3.so. + +The traditional way of dealing with these changes is to +migrate all packages to the new library as quickly as possible. +This is unduly cumbersome. + + + +Random Ideas +============ + Why is trans8x8_f64 so slow on powerpc (compared to trans8x8_u16)? diff --git a/configure.ac b/configure.ac index 7429ca8..e63a86e 100644 --- a/configure.ac +++ b/configure.ac @@ -86,6 +86,9 @@ AC_CHECK_LIB(m, lrint, AC_CHECK_LIB(m, lrintf, AC_DEFINE(HAVE_LRINTF, 1, [Define if lrintf() is available])) +AC_CHECK_HEADER(ieee754.h, + AC_DEFINE(HAVE_IEEE754_H, 1, [Define if ieee754.h exists])) + AS_COMPILER_FLAG(-Wall, LIBOIL_CFLAGS="$LIBOIL_CFLAGS -Wall") if test "x$LIBOIL_CVS" = "xyes" then diff --git a/liboil/colorspace/argb_paint.c b/liboil/colorspace/argb_paint.c index 6c7598a..7bbb168 100644 --- a/liboil/colorspace/argb_paint.c +++ b/liboil/colorspace/argb_paint.c @@ -35,8 +35,8 @@ OIL_DEFINE_CLASS (argb_paint_u8, "uint8_t *i_4xn, uint8_t *s1_4, uint8_t *s2_n, int n"); -#define imult(a,b) (((a)*(b) + (((a)*(b)) >> 8))>>8) -#define apply(a,b,c) (imult(a,255-c) + imult(b,c)) +#define div255(x) (((x + 128) + ((x + 128)>>8))>>8) +#define blend(x,y,a) div255((x)*(a) + (y)*(255-(a))) static void argb_paint_u8_ref (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n) @@ -44,10 +44,10 @@ argb_paint_u8_ref (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n) int i; for(i=0;i<n;i++){ - dest[0] = apply(dest[0],color[0],alpha[0]); - dest[1] = apply(dest[1],color[1],alpha[0]); - dest[2] = apply(dest[2],color[2],alpha[0]); - dest[3] = apply(dest[3],color[3],alpha[0]); + dest[0] = blend(color[0],dest[0],alpha[0]); + dest[1] = blend(color[1],dest[1],alpha[0]); + dest[2] = blend(color[2],dest[2],alpha[0]); + dest[3] = blend(color[3],dest[3],alpha[0]); dest+=4; alpha++; } @@ -68,10 +68,10 @@ argb_paint_u8_fast (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n) dest[2] = color[2]; dest[3] = color[3]; } else { - dest[0] = apply(dest[0],color[0],alpha[0]); - dest[1] = apply(dest[1],color[1],alpha[0]); - dest[2] = apply(dest[2],color[2],alpha[0]); - dest[3] = apply(dest[3],color[3],alpha[0]); + dest[0] = blend(color[0],dest[0],alpha[0]); + dest[1] = blend(color[1],dest[1],alpha[0]); + dest[2] = blend(color[2],dest[2],alpha[0]); + dest[3] = blend(color[3],dest[3],alpha[0]); } dest+=4; alpha++; diff --git a/liboil/colorspace/argb_paint_i386.c b/liboil/colorspace/argb_paint_i386.c index f6785a4..3753589 100644 --- a/liboil/colorspace/argb_paint_i386.c +++ b/liboil/colorspace/argb_paint_i386.c @@ -35,11 +35,12 @@ OIL_DECLARE_CLASS (argb_paint_u8); -#define imult(a,b) (((a)*(b) + (((a)*(b)) >> 8))>>8) -#define apply(a,b,c) (imult(a,255-c) + imult(b,c)) +#define div255(x) (((x + 128) + ((x + 128)>>8))>>8) +#define blend(x,y,a) div255((x)*(a) + (y)*(255-(a))) static short constants[][4] = { - { 255, 255, 255, 255 } + { 255, 255, 255, 255 }, + { 128, 128, 128, 128 } }; static void @@ -55,10 +56,10 @@ argb_paint_u8_mmx (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n) " movq (%0), %%mm1\n" " punpcklbw %%mm0, %%mm1\n" " movb (%2), %%al\n" - " je 3f\n" + " je 4f\n" " cmpl $255, %1\n" " jne 2f\n" - " movd %%mm3, (%0)\n" + " movq %%mm3, %%mm2\n" " jmp 3f\n" "2:\n" " movd %1, %%mm2\n" @@ -68,13 +69,15 @@ argb_paint_u8_mmx (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n) " pmullw %%mm1, %%mm4\n" " pmullw %%mm3, %%mm2\n" " paddw %%mm4, %%mm2\n" + " paddw 8(%4), %%mm2\n" " movq %%mm2, %%mm1\n" " psrlw $8, %%mm1\n" " paddw %%mm1, %%mm2\n" " psrlw $8, %%mm2\n" + "3: \n" " packuswb %%mm0, %%mm2\n" " movd %%mm2, (%0)\n" - "3:\n" + "4:\n" " add $4, %0\n" " add $1, %2\n" " decl %3\n" @@ -85,3 +88,4 @@ argb_paint_u8_mmx (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n) } OIL_DEFINE_IMPL_FULL (argb_paint_u8_mmx, argb_paint_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE); + diff --git a/liboil/conv/conv_3dnow.c b/liboil/conv/conv_3dnow.c index 54da52f..af7df49 100644 --- a/liboil/conv/conv_3dnow.c +++ b/liboil/conv/conv_3dnow.c @@ -34,45 +34,52 @@ #ifdef __GNUC__ /* suboptimal */ -static void conv_f32_s32_3dnow(float *dst, int dst_stride, int32_t *src, int src_stride, int n) +static void +conv_f32_s16_3dnow(float *dst, int dst_stride, int16_t *src, int src_stride, + int n) { int i; - if (n & 1) - *dst++ = (float) *src++; - n /= 2; - for(i=0;i<n;i++){ asm volatile( - " pi2fd 0(%0), %%mm0 \n" - " movq %%mm0, 0(%1) \n" + " xor %%eax, %%eax \n" + " movw 0(%0), %%eax \n" + " movd %%eax, %%mm0 \n" + " pi2fd 0(%0), %%mm0 \n" + " movd %%mm0, 0(%1) \n" : - : "a" (src), "c" (dst) - : "mm0" + : "r" (src), "r" (dst) + : "eax", "mm0" ); dst = OIL_OFFSET(dst, dst_stride); src = OIL_OFFSET(src, src_stride); } asm volatile ("emms"); } -OIL_DEFINE_IMPL_FULL(conv_f32_s32_3dnow, conv_f32_s32, +OIL_DEFINE_IMPL_FULL(conv_f32_s16_3dnow, conv_f32_s16, OIL_IMPL_FLAG_3DNOW); /* suboptimal */ -static void conv_s32_f32_3dnow (int32_t *dst, int dst_stride, float *src, int src_stride, int n) +static void +conv_s32_f32_3dnow (int32_t *dst, int dst_stride, float *src, int src_stride, + int n) { int i; - - if (n & 1) - *src++ = (int32_t) *dst++; - n /= 2; + const float constants[][2] = { + { -0.5, -0.5 }, + { -1.0, -1.0 } + }; for(i=0;i<n;i++){ asm volatile( - " pf2id 0(%0), %%mm0 \n" - " movq %%mm0, 0(%1) \n" + " movq 0(%0), %%mm0 \n" + " pfadd 0(%2), %%mm0 \n" + " pf2id %%mm0, %%mm1 \n" + " pfcmpgt 0(%2), %%mm0 \n" + " paddd %%mm0, %%mm1 \n" + " movd %%mm1, 0(%1) \n" : - : "a" (src), "c" (dst) + : "r" (src), "r" (dst), "r" (constants) : "mm0" ); dst = OIL_OFFSET(dst, dst_stride); diff --git a/liboil/conv/conv_bitstuff.c b/liboil/conv/conv_bitstuff.c index 82c7b9d..a535bfe 100644 --- a/liboil/conv/conv_bitstuff.c +++ b/liboil/conv/conv_bitstuff.c @@ -29,6 +29,7 @@ #include "config.h" #endif #include <liboil/liboilfunction.h> +#include <conv.h> #ifdef HAVE_IEEE754_H @@ -49,7 +50,7 @@ static void conv_f32_u8_bitstuff(float *dst, int dest_stride, uint8_t *src, OIL_INCREMENT(src, src_stride); } } -OIL_DEFINE_IMPL(conv_f32_u8_bitstuff, conv_f32_u8_class); +OIL_DEFINE_IMPL(conv_f32_u8_bitstuff, conv_f32_u8); static void conv_f32_s8_bitstuff(float *dst, int dest_stride, int8_t *src, int src_stride, int n) @@ -66,7 +67,7 @@ static void conv_f32_s8_bitstuff(float *dst, int dest_stride, int8_t *src, OIL_INCREMENT(src, src_stride); } } -OIL_DEFINE_IMPL(conv_f32_s8_bitstuff, conv_f32_s8_class); +OIL_DEFINE_IMPL(conv_f32_s8_bitstuff, conv_f32_s8); static void conv_f32_u16_bitstuff(float *dst, int dest_stride, uint16_t *src, int src_stride, int n) @@ -83,7 +84,7 @@ static void conv_f32_u16_bitstuff(float *dst, int dest_stride, uint16_t *src, OIL_INCREMENT(src, src_stride); } } -OIL_DEFINE_IMPL(conv_f32_u16_bitstuff, conv_f32_u16_class); +OIL_DEFINE_IMPL(conv_f32_u16_bitstuff, conv_f32_u16); static void conv_f32_s16_bitstuff(float *dst, int dest_stride, int16_t *src, int src_stride, int n) @@ -100,7 +101,7 @@ static void conv_f32_s16_bitstuff(float *dst, int dest_stride, int16_t *src, OIL_INCREMENT(src, src_stride); } } -OIL_DEFINE_IMPL(conv_f32_s16_bitstuff, conv_f32_s16_class); +OIL_DEFINE_IMPL(conv_f32_s16_bitstuff, conv_f32_s16); #define signbit_S32(x) (((uint32_t)(x))>>31) @@ -124,7 +125,7 @@ static void conv_s16_f32_bitstuff(int16_t *dst, int dest_stride, float *src, OIL_INCREMENT(src, src_stride); } } -OIL_DEFINE_IMPL(conv_s16_f32_bitstuff, conv_s16_f32_class); +OIL_DEFINE_IMPL(conv_s16_f32_bitstuff, conv_s16_f32); #if 0 @@ -143,7 +144,7 @@ static void conv_f64_u8_bitstuff(float *dst, int dest_stride, uint8_t *src, OIL_INCREMENT(src, src_stride); } } -OIL_DEFINE_IMPL(conv_f64_u8_bitstuff, conv_f64_u8_class); +OIL_DEFINE_IMPL(conv_f64_u8_bitstuff, conv_f64_u8); static void conv_f64_s8_bitstuff(float *dst, int dest_stride, int8_t *src, int src_stride, int n) @@ -160,7 +161,7 @@ static void conv_f64_s8_bitstuff(float *dst, int dest_stride, int8_t *src, OIL_INCREMENT(src, src_stride); } } -OIL_DEFINE_IMPL(conv_f64_s8_bitstuff, conv_f64_s8_class); +OIL_DEFINE_IMPL(conv_f64_s8_bitstuff, conv_f64_s8); static void conv_f64_u16_bitstuff(float *dst, int dest_stride, uint16_t *src, int src_stride, int n) @@ -177,7 +178,7 @@ static void conv_f64_u16_bitstuff(float *dst, int dest_stride, uint16_t *src, OIL_INCREMENT(src, src_stride); } } -OIL_DEFINE_IMPL(conv_f64_u16_bitstuff, conv_f64_u16_class); +OIL_DEFINE_IMPL(conv_f64_u16_bitstuff, conv_f64_u16); static void conv_f64_s16_bitstuff(float *dst, int dest_stride, int16_t *src, int src_stride, int n) @@ -194,7 +195,7 @@ static void conv_f64_s16_bitstuff(float *dst, int dest_stride, int16_t *src, OIL_INCREMENT(src, src_stride); } } -OIL_DEFINE_IMPL(conv_f64_s16_bitstuff, conv_f64_s16_class); +OIL_DEFINE_IMPL(conv_f64_s16_bitstuff, conv_f64_s16); #endif /* This implementation is slightly inaccurate */ @@ -225,7 +226,7 @@ static void conv_s16_f64_bitstuff(int16_t *dst, int dest_stride, float *src, OIL_INCREMENT (src, src_stride); } } -OIL_DEFINE_IMPL(conv_s16_f64_bitstuff, conv_s16_f64_class); +OIL_DEFINE_IMPL(conv_s16_f64_bitstuff, conv_s16_f64); #endif diff --git a/liboil/conv/conv_misc.c b/liboil/conv/conv_misc.c index 73898be..b32250e 100644 --- a/liboil/conv/conv_misc.c +++ b/liboil/conv/conv_misc.c @@ -38,6 +38,43 @@ static void conv_f64_s16_table(double *dest, int dest_stride, short *src, int src_stride, int n) { + static double ints_high[256]; + static double ints_low[256]; + static int init = 0; + int i; + unsigned int idx; + if(!init){ + for(i=0;i<256;i++){ + ints_high[i]=256.0*((i<128)?i:i-256); + ints_low[i]=i; + } + init = 1; + } + + if(n&1){ + idx = (unsigned short)*src; + *dest = ints_high[(idx>>8)] + ints_low[(idx&0xff)]; + OIL_INCREMENT(dest, dest_stride); + OIL_INCREMENT(src, src_stride); + n-=1; + } + for(i=0;i<n;i+=2){ + idx = (unsigned short)*src; + *dest = ints_high[(idx>>8)] + ints_low[(idx&0xff)]; + OIL_INCREMENT(dest, dest_stride); + OIL_INCREMENT(src, src_stride); + idx = (unsigned short)*src; + *dest = ints_high[(idx>>8)] + ints_low[(idx&0xff)]; + OIL_INCREMENT(dest, dest_stride); + OIL_INCREMENT(src, src_stride); + } +} +OIL_DEFINE_IMPL(conv_f64_s16_table, conv_f64_s16); + +static void +conv_f32_s16_table(float *dest, int dest_stride, short *src, + int src_stride, int n) +{ static float ints_high[256]; static float ints_low[256]; static int init = 0; @@ -69,6 +106,7 @@ conv_f64_s16_table(double *dest, int dest_stride, short *src, OIL_INCREMENT(src, src_stride); } } -OIL_DEFINE_IMPL(conv_f64_s16_table, conv_f64_s16); +OIL_DEFINE_IMPL(conv_f32_s16_table, conv_f32_s16); + diff --git a/liboil/simdpack/diffsquaresum_f64.c b/liboil/simdpack/diffsquaresum_f64.c index 633f90a..f503ccf 100644 --- a/liboil/simdpack/diffsquaresum_f64.c +++ b/liboil/simdpack/diffsquaresum_f64.c @@ -47,7 +47,8 @@ diffsquaresum_f64_ref(double *dest, double *src1, int sstr1, double *src2, int i; for(i=0;i<n;i++){ - x = OIL_OFFSET(src1, i*sstr1) - OIL_OFFSET(src2, i*sstr2); + x = OIL_GET(src1, i*sstr1, double) - + OIL_GET(src2, i*sstr2, double); x = x*x; tmp = sum; sum += x; @@ -68,7 +69,8 @@ diffsquaresum_f64_i10_simple(double *dest, double *src1, int sstr1, double *src2 int i; for(i=0;i<n;i++){ - x = OIL_OFFSET(src1, i*sstr1) - OIL_OFFSET(src2, i*sstr2); + x = OIL_GET(src1, i*sstr1, double) - + OIL_GET(src2, i*sstr2, double); x = x*x; sum += x; } @@ -78,6 +80,25 @@ diffsquaresum_f64_i10_simple(double *dest, double *src1, int sstr1, double *src2 OIL_DEFINE_IMPL (diffsquaresum_f64_i10_simple, diffsquaresum_f64); static void +diffsquaresum_f64_i10_fast(double *dest, double *src1, int sstr1, double *src2, + int sstr2, int n) +{ + double sum0 = 0; + double x; + + while(n>0){ + x = *src1 - *src2; + sum0 += x * x; + OIL_INCREMENT (src1, sstr1); + OIL_INCREMENT (src2, sstr2); + n--; + } + + *dest = sum0; +} +OIL_DEFINE_IMPL (diffsquaresum_f64_i10_fast, diffsquaresum_f64); + +static void diffsquaresum_f64_i10_unroll2(double *dest, double *src1, int sstr1, double *src2, int sstr2, int n) { |