summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2005-01-08 06:01:10 +0000
committerDavid Schleef <ds@schleef.org>2005-01-08 06:01:10 +0000
commit4d44fde5765f6ad78053a29a1f514b2077cfd60a (patch)
tree964b4f47f28ecf07a821940daa6d4a4be5923f2e
parent9e7d1e7c99c7d238cf2d87db12321349d5f4aeb5 (diff)
downloadliboil-4d44fde5765f6ad78053a29a1f514b2077cfd60a.tar.gz
* README: add some stuff to read
* configure.ac: check for ieee754.h header * liboil/colorspace/argb_paint.c: (argb_paint_u8_ref), (argb_paint_u8_fast): change algorithm to be more accurate * liboil/colorspace/argb_paint_i386.c: (argb_paint_u8_mmx): same * liboil/conv/conv_3dnow.c: (conv_f32_s16_3dnow), (conv_s32_f32_3dnow): make these implementations actually work * liboil/conv/conv_bitstuff.c: compile fixes * liboil/conv/conv_misc.c: (conv_f64_s16_table), (conv_f32_s16_table): add a f32 impl * liboil/simdpack/diffsquaresum_f64.c: (diffsquaresum_f64_ref), (diffsquaresum_f64_i10_simple), (diffsquaresum_f64_i10_fast): fix implementation
-rw-r--r--ChangeLog16
-rw-r--r--README52
-rw-r--r--configure.ac3
-rw-r--r--liboil/colorspace/argb_paint.c20
-rw-r--r--liboil/colorspace/argb_paint_i386.c16
-rw-r--r--liboil/conv/conv_3dnow.c43
-rw-r--r--liboil/conv/conv_bitstuff.c21
-rw-r--r--liboil/conv/conv_misc.c40
-rw-r--r--liboil/simdpack/diffsquaresum_f64.c25
9 files changed, 182 insertions, 54 deletions
diff --git a/ChangeLog b/ChangeLog
index 3b5555b..af63a24 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2005-01-07 David Schleef <ds@schleef.org>
+
+ * README: add some stuff to read
+ * configure.ac: check for ieee754.h header
+ * liboil/colorspace/argb_paint.c: (argb_paint_u8_ref),
+ (argb_paint_u8_fast): change algorithm to be more accurate
+ * liboil/colorspace/argb_paint_i386.c: (argb_paint_u8_mmx): same
+ * liboil/conv/conv_3dnow.c: (conv_f32_s16_3dnow),
+ (conv_s32_f32_3dnow): make these implementations actually work
+ * liboil/conv/conv_bitstuff.c: compile fixes
+ * liboil/conv/conv_misc.c: (conv_f64_s16_table),
+ (conv_f32_s16_table): add a f32 impl
+ * liboil/simdpack/diffsquaresum_f64.c: (diffsquaresum_f64_ref),
+ (diffsquaresum_f64_i10_simple), (diffsquaresum_f64_i10_fast):
+ fix implementation
+
2005-01-05 David Schleef <ds@schleef.org>
* testsuite/stride.c: (main), (hist): new test
diff --git a/README b/README
index e3c5f80..12cfe35 100644
--- a/README
+++ b/README
@@ -1,15 +1,53 @@
-Ideas:
+ABI warning
+===========
-implementations should be flagged as to whether or not they contain
-assembly, and also whether or not they are the product of the
-alternate optimization. This way, testing code can automatically
-flag functions where the alternate optimization is faster than the
-C or asm code, indicating that the compiler output should be turned
-into assembly.
+In general, liboil is not ABI stable. However, portions of liboil
+are guaranteed to be stable through the 0.3.x series, and using
+a compatibility library, through the 0.4.x series as well. This
+policy is designed to provide almost all applications with ABI
+stability for the symbols most used in liboil.
+Any symbols declared by including <liboil/liboil.h> follow this
+ABI policy. This includes all liboil function classes as well
+as a limited number of core functionality, such as oil_init().
+
+ABI Implementation
+==================
+
+The liboil-0.3.x series create the shared library liboil-0.3.so.
+Applications that use liboil are linked against this library, and
+will load liboil-0.3.so at runtime.
+
+The liboil-0.4.x series will create two shared libraries,
+liboil-0.4.so and a compaitiblity library liboil-0.3.so that
+implements functionality removed in the 0.3->0.4 transition.
+The 0.3 shared library will be binary compatible with the
+liboil-0.3.x releases. Thus, applictions compiled with a
+liboil-0.3.x release will continue to function after the shared
+library is upgraded to a 0.4.x release. Applications compiled
+with 0.4.x will use liboil-0.4.so directly.
+
+Distributions can use this compatibility library in order to
+smooth transitions between liboil major releases. For example,
+consider the case where AppA depends on libB and libC, and both
+libraries use liboil. When liboil-0.4 is released, the distro
+can start using it immediately, and AppA will continue to work
+correctly. Then, at a convenient time, libB and libC can
+independently be recompiled using liboil-0.4.x, and the packages
+will no longer depend on liboil-0.3.so.
+
+The traditional way of dealing with these changes is to
+migrate all packages to the new library as quickly as possible.
+This is unduly cumbersome.
+
+
+
+Random Ideas
+============
+
Why is trans8x8_f64 so slow on powerpc (compared to trans8x8_u16)?
diff --git a/configure.ac b/configure.ac
index 7429ca8..e63a86e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -86,6 +86,9 @@ AC_CHECK_LIB(m, lrint,
AC_CHECK_LIB(m, lrintf,
AC_DEFINE(HAVE_LRINTF, 1, [Define if lrintf() is available]))
+AC_CHECK_HEADER(ieee754.h,
+ AC_DEFINE(HAVE_IEEE754_H, 1, [Define if ieee754.h exists]))
+
AS_COMPILER_FLAG(-Wall, LIBOIL_CFLAGS="$LIBOIL_CFLAGS -Wall")
if test "x$LIBOIL_CVS" = "xyes"
then
diff --git a/liboil/colorspace/argb_paint.c b/liboil/colorspace/argb_paint.c
index 6c7598a..7bbb168 100644
--- a/liboil/colorspace/argb_paint.c
+++ b/liboil/colorspace/argb_paint.c
@@ -35,8 +35,8 @@
OIL_DEFINE_CLASS (argb_paint_u8, "uint8_t *i_4xn, uint8_t *s1_4, uint8_t *s2_n, int n");
-#define imult(a,b) (((a)*(b) + (((a)*(b)) >> 8))>>8)
-#define apply(a,b,c) (imult(a,255-c) + imult(b,c))
+#define div255(x) (((x + 128) + ((x + 128)>>8))>>8)
+#define blend(x,y,a) div255((x)*(a) + (y)*(255-(a)))
static void
argb_paint_u8_ref (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
@@ -44,10 +44,10 @@ argb_paint_u8_ref (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
int i;
for(i=0;i<n;i++){
- dest[0] = apply(dest[0],color[0],alpha[0]);
- dest[1] = apply(dest[1],color[1],alpha[0]);
- dest[2] = apply(dest[2],color[2],alpha[0]);
- dest[3] = apply(dest[3],color[3],alpha[0]);
+ dest[0] = blend(color[0],dest[0],alpha[0]);
+ dest[1] = blend(color[1],dest[1],alpha[0]);
+ dest[2] = blend(color[2],dest[2],alpha[0]);
+ dest[3] = blend(color[3],dest[3],alpha[0]);
dest+=4;
alpha++;
}
@@ -68,10 +68,10 @@ argb_paint_u8_fast (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
dest[2] = color[2];
dest[3] = color[3];
} else {
- dest[0] = apply(dest[0],color[0],alpha[0]);
- dest[1] = apply(dest[1],color[1],alpha[0]);
- dest[2] = apply(dest[2],color[2],alpha[0]);
- dest[3] = apply(dest[3],color[3],alpha[0]);
+ dest[0] = blend(color[0],dest[0],alpha[0]);
+ dest[1] = blend(color[1],dest[1],alpha[0]);
+ dest[2] = blend(color[2],dest[2],alpha[0]);
+ dest[3] = blend(color[3],dest[3],alpha[0]);
}
dest+=4;
alpha++;
diff --git a/liboil/colorspace/argb_paint_i386.c b/liboil/colorspace/argb_paint_i386.c
index f6785a4..3753589 100644
--- a/liboil/colorspace/argb_paint_i386.c
+++ b/liboil/colorspace/argb_paint_i386.c
@@ -35,11 +35,12 @@
OIL_DECLARE_CLASS (argb_paint_u8);
-#define imult(a,b) (((a)*(b) + (((a)*(b)) >> 8))>>8)
-#define apply(a,b,c) (imult(a,255-c) + imult(b,c))
+#define div255(x) (((x + 128) + ((x + 128)>>8))>>8)
+#define blend(x,y,a) div255((x)*(a) + (y)*(255-(a)))
static short constants[][4] = {
- { 255, 255, 255, 255 }
+ { 255, 255, 255, 255 },
+ { 128, 128, 128, 128 }
};
static void
@@ -55,10 +56,10 @@ argb_paint_u8_mmx (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
" movq (%0), %%mm1\n"
" punpcklbw %%mm0, %%mm1\n"
" movb (%2), %%al\n"
- " je 3f\n"
+ " je 4f\n"
" cmpl $255, %1\n"
" jne 2f\n"
- " movd %%mm3, (%0)\n"
+ " movq %%mm3, %%mm2\n"
" jmp 3f\n"
"2:\n"
" movd %1, %%mm2\n"
@@ -68,13 +69,15 @@ argb_paint_u8_mmx (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
" pmullw %%mm1, %%mm4\n"
" pmullw %%mm3, %%mm2\n"
" paddw %%mm4, %%mm2\n"
+ " paddw 8(%4), %%mm2\n"
" movq %%mm2, %%mm1\n"
" psrlw $8, %%mm1\n"
" paddw %%mm1, %%mm2\n"
" psrlw $8, %%mm2\n"
+ "3: \n"
" packuswb %%mm0, %%mm2\n"
" movd %%mm2, (%0)\n"
- "3:\n"
+ "4:\n"
" add $4, %0\n"
" add $1, %2\n"
" decl %3\n"
@@ -85,3 +88,4 @@ argb_paint_u8_mmx (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
}
OIL_DEFINE_IMPL_FULL (argb_paint_u8_mmx, argb_paint_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE);
+
diff --git a/liboil/conv/conv_3dnow.c b/liboil/conv/conv_3dnow.c
index 54da52f..af7df49 100644
--- a/liboil/conv/conv_3dnow.c
+++ b/liboil/conv/conv_3dnow.c
@@ -34,45 +34,52 @@
#ifdef __GNUC__
/* suboptimal */
-static void conv_f32_s32_3dnow(float *dst, int dst_stride, int32_t *src, int src_stride, int n)
+static void
+conv_f32_s16_3dnow(float *dst, int dst_stride, int16_t *src, int src_stride,
+ int n)
{
int i;
- if (n & 1)
- *dst++ = (float) *src++;
- n /= 2;
-
for(i=0;i<n;i++){
asm volatile(
- " pi2fd 0(%0), %%mm0 \n"
- " movq %%mm0, 0(%1) \n"
+ " xor %%eax, %%eax \n"
+ " movw 0(%0), %%eax \n"
+ " movd %%eax, %%mm0 \n"
+ " pi2fd 0(%0), %%mm0 \n"
+ " movd %%mm0, 0(%1) \n"
:
- : "a" (src), "c" (dst)
- : "mm0"
+ : "r" (src), "r" (dst)
+ : "eax", "mm0"
);
dst = OIL_OFFSET(dst, dst_stride);
src = OIL_OFFSET(src, src_stride);
}
asm volatile ("emms");
}
-OIL_DEFINE_IMPL_FULL(conv_f32_s32_3dnow, conv_f32_s32,
+OIL_DEFINE_IMPL_FULL(conv_f32_s16_3dnow, conv_f32_s16,
OIL_IMPL_FLAG_3DNOW);
/* suboptimal */
-static void conv_s32_f32_3dnow (int32_t *dst, int dst_stride, float *src, int src_stride, int n)
+static void
+conv_s32_f32_3dnow (int32_t *dst, int dst_stride, float *src, int src_stride,
+ int n)
{
int i;
-
- if (n & 1)
- *src++ = (int32_t) *dst++;
- n /= 2;
+ const float constants[][2] = {
+ { -0.5, -0.5 },
+ { -1.0, -1.0 }
+ };
for(i=0;i<n;i++){
asm volatile(
- " pf2id 0(%0), %%mm0 \n"
- " movq %%mm0, 0(%1) \n"
+ " movq 0(%0), %%mm0 \n"
+ " pfadd 0(%2), %%mm0 \n"
+ " pf2id %%mm0, %%mm1 \n"
+ " pfcmpgt 0(%2), %%mm0 \n"
+ " paddd %%mm0, %%mm1 \n"
+ " movd %%mm1, 0(%1) \n"
:
- : "a" (src), "c" (dst)
+ : "r" (src), "r" (dst), "r" (constants)
: "mm0"
);
dst = OIL_OFFSET(dst, dst_stride);
diff --git a/liboil/conv/conv_bitstuff.c b/liboil/conv/conv_bitstuff.c
index 82c7b9d..a535bfe 100644
--- a/liboil/conv/conv_bitstuff.c
+++ b/liboil/conv/conv_bitstuff.c
@@ -29,6 +29,7 @@
#include "config.h"
#endif
#include <liboil/liboilfunction.h>
+#include <conv.h>
#ifdef HAVE_IEEE754_H
@@ -49,7 +50,7 @@ static void conv_f32_u8_bitstuff(float *dst, int dest_stride, uint8_t *src,
OIL_INCREMENT(src, src_stride);
}
}
-OIL_DEFINE_IMPL(conv_f32_u8_bitstuff, conv_f32_u8_class);
+OIL_DEFINE_IMPL(conv_f32_u8_bitstuff, conv_f32_u8);
static void conv_f32_s8_bitstuff(float *dst, int dest_stride, int8_t *src,
int src_stride, int n)
@@ -66,7 +67,7 @@ static void conv_f32_s8_bitstuff(float *dst, int dest_stride, int8_t *src,
OIL_INCREMENT(src, src_stride);
}
}
-OIL_DEFINE_IMPL(conv_f32_s8_bitstuff, conv_f32_s8_class);
+OIL_DEFINE_IMPL(conv_f32_s8_bitstuff, conv_f32_s8);
static void conv_f32_u16_bitstuff(float *dst, int dest_stride, uint16_t *src,
int src_stride, int n)
@@ -83,7 +84,7 @@ static void conv_f32_u16_bitstuff(float *dst, int dest_stride, uint16_t *src,
OIL_INCREMENT(src, src_stride);
}
}
-OIL_DEFINE_IMPL(conv_f32_u16_bitstuff, conv_f32_u16_class);
+OIL_DEFINE_IMPL(conv_f32_u16_bitstuff, conv_f32_u16);
static void conv_f32_s16_bitstuff(float *dst, int dest_stride, int16_t *src,
int src_stride, int n)
@@ -100,7 +101,7 @@ static void conv_f32_s16_bitstuff(float *dst, int dest_stride, int16_t *src,
OIL_INCREMENT(src, src_stride);
}
}
-OIL_DEFINE_IMPL(conv_f32_s16_bitstuff, conv_f32_s16_class);
+OIL_DEFINE_IMPL(conv_f32_s16_bitstuff, conv_f32_s16);
#define signbit_S32(x) (((uint32_t)(x))>>31)
@@ -124,7 +125,7 @@ static void conv_s16_f32_bitstuff(int16_t *dst, int dest_stride, float *src,
OIL_INCREMENT(src, src_stride);
}
}
-OIL_DEFINE_IMPL(conv_s16_f32_bitstuff, conv_s16_f32_class);
+OIL_DEFINE_IMPL(conv_s16_f32_bitstuff, conv_s16_f32);
#if 0
@@ -143,7 +144,7 @@ static void conv_f64_u8_bitstuff(float *dst, int dest_stride, uint8_t *src,
OIL_INCREMENT(src, src_stride);
}
}
-OIL_DEFINE_IMPL(conv_f64_u8_bitstuff, conv_f64_u8_class);
+OIL_DEFINE_IMPL(conv_f64_u8_bitstuff, conv_f64_u8);
static void conv_f64_s8_bitstuff(float *dst, int dest_stride, int8_t *src,
int src_stride, int n)
@@ -160,7 +161,7 @@ static void conv_f64_s8_bitstuff(float *dst, int dest_stride, int8_t *src,
OIL_INCREMENT(src, src_stride);
}
}
-OIL_DEFINE_IMPL(conv_f64_s8_bitstuff, conv_f64_s8_class);
+OIL_DEFINE_IMPL(conv_f64_s8_bitstuff, conv_f64_s8);
static void conv_f64_u16_bitstuff(float *dst, int dest_stride, uint16_t *src,
int src_stride, int n)
@@ -177,7 +178,7 @@ static void conv_f64_u16_bitstuff(float *dst, int dest_stride, uint16_t *src,
OIL_INCREMENT(src, src_stride);
}
}
-OIL_DEFINE_IMPL(conv_f64_u16_bitstuff, conv_f64_u16_class);
+OIL_DEFINE_IMPL(conv_f64_u16_bitstuff, conv_f64_u16);
static void conv_f64_s16_bitstuff(float *dst, int dest_stride, int16_t *src,
int src_stride, int n)
@@ -194,7 +195,7 @@ static void conv_f64_s16_bitstuff(float *dst, int dest_stride, int16_t *src,
OIL_INCREMENT(src, src_stride);
}
}
-OIL_DEFINE_IMPL(conv_f64_s16_bitstuff, conv_f64_s16_class);
+OIL_DEFINE_IMPL(conv_f64_s16_bitstuff, conv_f64_s16);
#endif
/* This implementation is slightly inaccurate */
@@ -225,7 +226,7 @@ static void conv_s16_f64_bitstuff(int16_t *dst, int dest_stride, float *src,
OIL_INCREMENT (src, src_stride);
}
}
-OIL_DEFINE_IMPL(conv_s16_f64_bitstuff, conv_s16_f64_class);
+OIL_DEFINE_IMPL(conv_s16_f64_bitstuff, conv_s16_f64);
#endif
diff --git a/liboil/conv/conv_misc.c b/liboil/conv/conv_misc.c
index 73898be..b32250e 100644
--- a/liboil/conv/conv_misc.c
+++ b/liboil/conv/conv_misc.c
@@ -38,6 +38,43 @@ static void
conv_f64_s16_table(double *dest, int dest_stride, short *src,
int src_stride, int n)
{
+ static double ints_high[256];
+ static double ints_low[256];
+ static int init = 0;
+ int i;
+ unsigned int idx;
+ if(!init){
+ for(i=0;i<256;i++){
+ ints_high[i]=256.0*((i<128)?i:i-256);
+ ints_low[i]=i;
+ }
+ init = 1;
+ }
+
+ if(n&1){
+ idx = (unsigned short)*src;
+ *dest = ints_high[(idx>>8)] + ints_low[(idx&0xff)];
+ OIL_INCREMENT(dest, dest_stride);
+ OIL_INCREMENT(src, src_stride);
+ n-=1;
+ }
+ for(i=0;i<n;i+=2){
+ idx = (unsigned short)*src;
+ *dest = ints_high[(idx>>8)] + ints_low[(idx&0xff)];
+ OIL_INCREMENT(dest, dest_stride);
+ OIL_INCREMENT(src, src_stride);
+ idx = (unsigned short)*src;
+ *dest = ints_high[(idx>>8)] + ints_low[(idx&0xff)];
+ OIL_INCREMENT(dest, dest_stride);
+ OIL_INCREMENT(src, src_stride);
+ }
+}
+OIL_DEFINE_IMPL(conv_f64_s16_table, conv_f64_s16);
+
+static void
+conv_f32_s16_table(float *dest, int dest_stride, short *src,
+ int src_stride, int n)
+{
static float ints_high[256];
static float ints_low[256];
static int init = 0;
@@ -69,6 +106,7 @@ conv_f64_s16_table(double *dest, int dest_stride, short *src,
OIL_INCREMENT(src, src_stride);
}
}
-OIL_DEFINE_IMPL(conv_f64_s16_table, conv_f64_s16);
+OIL_DEFINE_IMPL(conv_f32_s16_table, conv_f32_s16);
+
diff --git a/liboil/simdpack/diffsquaresum_f64.c b/liboil/simdpack/diffsquaresum_f64.c
index 633f90a..f503ccf 100644
--- a/liboil/simdpack/diffsquaresum_f64.c
+++ b/liboil/simdpack/diffsquaresum_f64.c
@@ -47,7 +47,8 @@ diffsquaresum_f64_ref(double *dest, double *src1, int sstr1, double *src2,
int i;
for(i=0;i<n;i++){
- x = OIL_OFFSET(src1, i*sstr1) - OIL_OFFSET(src2, i*sstr2);
+ x = OIL_GET(src1, i*sstr1, double) -
+ OIL_GET(src2, i*sstr2, double);
x = x*x;
tmp = sum;
sum += x;
@@ -68,7 +69,8 @@ diffsquaresum_f64_i10_simple(double *dest, double *src1, int sstr1, double *src2
int i;
for(i=0;i<n;i++){
- x = OIL_OFFSET(src1, i*sstr1) - OIL_OFFSET(src2, i*sstr2);
+ x = OIL_GET(src1, i*sstr1, double) -
+ OIL_GET(src2, i*sstr2, double);
x = x*x;
sum += x;
}
@@ -78,6 +80,25 @@ diffsquaresum_f64_i10_simple(double *dest, double *src1, int sstr1, double *src2
OIL_DEFINE_IMPL (diffsquaresum_f64_i10_simple, diffsquaresum_f64);
static void
+diffsquaresum_f64_i10_fast(double *dest, double *src1, int sstr1, double *src2,
+ int sstr2, int n)
+{
+ double sum0 = 0;
+ double x;
+
+ while(n>0){
+ x = *src1 - *src2;
+ sum0 += x * x;
+ OIL_INCREMENT (src1, sstr1);
+ OIL_INCREMENT (src2, sstr2);
+ n--;
+ }
+
+ *dest = sum0;
+}
+OIL_DEFINE_IMPL (diffsquaresum_f64_i10_fast, diffsquaresum_f64);
+
+static void
diffsquaresum_f64_i10_unroll2(double *dest, double *src1, int sstr1, double *src2,
int sstr2, int n)
{