diff options
author | David Schleef <ds@schleef.org> | 2005-07-27 02:13:01 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2005-07-27 02:13:01 +0000 |
commit | 399d0b95c77cdd7874629ea834260aa8c2de07c0 (patch) | |
tree | e046fd1081c08960a8defc15addedb2ca04e4abe | |
parent | b8fc92fe89955131563c6de839fe85bd80390ee5 (diff) | |
download | liboil-399d0b95c77cdd7874629ea834260aa8c2de07c0.tar.gz |
* doc/liboil-sections.txt: Add some recent new prototypes.
* doc/tmpl/liboilfuncs-math.sgml:
* doc/tmpl/liboilfuncs-pixel.sgml:
* examples/report.c: (get_n_impls), (oil_print_class):
* liboil/Makefile.am: add math subdir
* liboil/colorspace/composite.c: (composite_in_argb_ref),
(composite_in_argb_const_src_ref),
(composite_in_argb_const_mask_ref), (composite_over_argb_ref),
(composite_over_argb_const_src_ref), (composite_add_argb_ref),
(composite_add_argb_const_src_ref), (composite_in_over_argb_ref),
(composite_in_over_argb_const_src_ref),
(composite_in_over_argb_const_mask_ref): Fix OVER operator, and
rename compose to composite everywhere.
* liboil/liboilfuncs.h: new math functions
* liboil/liboiltest.c: (oil_test_new): change size of test array
back to 100, because 1000 is wrong and slow
* liboil/math/Makefile.am: new math code
* liboil/math/math.c:
-rw-r--r-- | ChangeLog | 21 | ||||
-rw-r--r-- | doc/liboil-sections.txt | 31 | ||||
-rw-r--r-- | doc/tmpl/liboilfuncs-math.sgml | 128 | ||||
-rw-r--r-- | doc/tmpl/liboilfuncs-pixel.sgml | 200 | ||||
-rw-r--r-- | examples/report.c | 18 | ||||
-rw-r--r-- | liboil/Makefile.am | 3 | ||||
-rw-r--r-- | liboil/colorspace/composite.c | 117 | ||||
-rw-r--r-- | liboil/liboilfuncs.h | 54 | ||||
-rw-r--r-- | liboil/liboiltest.c | 2 | ||||
-rw-r--r-- | liboil/math/Makefile.am | 33 | ||||
-rw-r--r-- | liboil/math/math.c | 273 |
11 files changed, 820 insertions, 60 deletions
@@ -1,5 +1,26 @@ 2005-07-26 David Schleef <ds@schleef.org> + * doc/liboil-sections.txt: Add some recent new prototypes. + * doc/tmpl/liboilfuncs-math.sgml: + * doc/tmpl/liboilfuncs-pixel.sgml: + * examples/report.c: (get_n_impls), (oil_print_class): + * liboil/Makefile.am: add math subdir + * liboil/colorspace/composite.c: (composite_in_argb_ref), + (composite_in_argb_const_src_ref), + (composite_in_argb_const_mask_ref), (composite_over_argb_ref), + (composite_over_argb_const_src_ref), (composite_add_argb_ref), + (composite_add_argb_const_src_ref), (composite_in_over_argb_ref), + (composite_in_over_argb_const_src_ref), + (composite_in_over_argb_const_mask_ref): Fix OVER operator, and + rename compose to composite everywhere. + * liboil/liboilfuncs.h: new math functions + * liboil/liboiltest.c: (oil_test_new): change size of test array + back to 100, because 1000 is wrong and slow + * liboil/math/Makefile.am: new math code + * liboil/math/math.c: + +2005-07-26 David Schleef <ds@schleef.org> + * CVS_HAS_MOVED: Revert last change in new repository * autogen.sh: * configure.ac: diff --git a/doc/liboil-sections.txt b/doc/liboil-sections.txt index 752b99c..035da0d 100644 --- a/doc/liboil-sections.txt +++ b/doc/liboil-sections.txt @@ -366,6 +366,18 @@ oil_vectoradd_u32 oil_vectoradd_u8 oil_squaresum_f64 oil_sum_f64 +oil_add_f32 +oil_divide_f32 +oil_floor_f32 +oil_inverse_f32 +oil_maximum_f32 +oil_minimum_f32 +oil_multiply_f32 +oil_negative_f32 +oil_scalaradd_f32_ns +oil_scalarmultiply_f32_ns +oil_sign_f32 +oil_subtract_f32 </SECTION> <SECTION> @@ -402,11 +414,30 @@ oil_idct8x8theora_s16 <FILE>liboilfuncs-pixel</FILE> oil_argb_paint_u8 oil_ayuv2argb_u8 +oil_ayuv2uyvy +oil_ayuv2yuyv +oil_ayuv2yvyu +oil_composite_add_argb +oil_composite_add_argb_const_src +oil_composite_in_argb +oil_composite_in_argb_const_mask +oil_composite_in_argb_const_src +oil_composite_in_over_argb +oil_composite_in_over_argb_const_mask +oil_composite_in_over_argb_const_src +oil_composite_over_argb +oil_composite_over_argb_const_src +oil_merge_linear_argb +oil_resample_linear_argb +oil_resample_linear_u8 oil_rgb2bgr oil_rgb2rgba oil_yuv2rgbx_sub2_u8 oil_yuv2rgbx_sub4_u8 oil_yuv2rgbx_u8 +oil_uyvy2ayuv +oil_yuyv2ayuv +oil_yvyu2ayuv </SECTION> <SECTION> diff --git a/doc/tmpl/liboilfuncs-math.sgml b/doc/tmpl/liboilfuncs-math.sgml index 57c4704..a8d307d 100644 --- a/doc/tmpl/liboilfuncs-math.sgml +++ b/doc/tmpl/liboilfuncs-math.sgml @@ -476,3 +476,131 @@ the destination array. @n: +<!-- ##### FUNCTION oil_add_f32 ##### --> +<para> + +</para> + +@d: +@s1: +@s2: +@n: + + +<!-- ##### FUNCTION oil_divide_f32 ##### --> +<para> + +</para> + +@d: +@s1: +@s2: +@n: + + +<!-- ##### FUNCTION oil_floor_f32 ##### --> +<para> + +</para> + +@d: +@s: +@n: + + +<!-- ##### FUNCTION oil_inverse_f32 ##### --> +<para> + +</para> + +@d: +@s: +@n: + + +<!-- ##### FUNCTION oil_maximum_f32 ##### --> +<para> + +</para> + +@d: +@s1: +@s2: +@n: + + +<!-- ##### FUNCTION oil_minimum_f32 ##### --> +<para> + +</para> + +@d: +@s1: +@s2: +@n: + + +<!-- ##### FUNCTION oil_multiply_f32 ##### --> +<para> + +</para> + +@d: +@s1: +@s2: +@n: + + +<!-- ##### FUNCTION oil_negative_f32 ##### --> +<para> + +</para> + +@d: +@s: +@n: + + +<!-- ##### FUNCTION oil_scalaradd_f32_ns ##### --> +<para> + +</para> + +@d: +@s1: +@s2_1: +@n: + + +<!-- ##### FUNCTION oil_scalarmultiply_f32_ns ##### --> +<para> + +</para> + +@d: +@s1: +@s2_1: +@n: + + +<!-- ##### FUNCTION oil_sign_f32 ##### --> +<para> + +</para> + +@d: +@s: +@n: + + +<!-- ##### FUNCTION oil_subtract_f32 ##### --> +<para> + +</para> + +@d: +@s1: +@s2: +@n: + + diff --git a/doc/tmpl/liboilfuncs-pixel.sgml b/doc/tmpl/liboilfuncs-pixel.sgml index 422d2cc..a409a06 100644 --- a/doc/tmpl/liboilfuncs-pixel.sgml +++ b/doc/tmpl/liboilfuncs-pixel.sgml @@ -35,6 +35,176 @@ Converts an array of AYUV pixels to ARGB. @n: +<!-- ##### FUNCTION oil_ayuv2uyvy ##### --> +<para> + +</para> + +@d_n: +@s_n: +@n: + + +<!-- ##### FUNCTION oil_ayuv2yuyv ##### --> +<para> + +</para> + +@d_n: +@s_n: +@n: + + +<!-- ##### FUNCTION oil_ayuv2yvyu ##### --> +<para> + +</para> + +@d_n: +@s_n: +@n: + + +<!-- ##### FUNCTION oil_composite_add_argb ##### --> +<para> + +</para> + +@i_n: +@s1_n: +@n: + + +<!-- ##### FUNCTION oil_composite_add_argb_const_src ##### --> +<para> + +</para> + +@i_n: +@s1_1: +@n: + + +<!-- ##### FUNCTION oil_composite_in_argb ##### --> +<para> + +</para> + +@d_n: +@s1_n: +@s2_n: +@n: + + +<!-- ##### FUNCTION oil_composite_in_argb_const_mask ##### --> +<para> + +</para> + +@d_n: +@s1_n: +@s2_1: +@n: + + +<!-- ##### FUNCTION oil_composite_in_argb_const_src ##### --> +<para> + +</para> + +@d_n: +@s1_1: +@s2_n: +@n: + + +<!-- ##### FUNCTION oil_composite_in_over_argb ##### --> +<para> + +</para> + +@i_n: +@s1_n: +@s2_n: +@n: + + +<!-- ##### FUNCTION oil_composite_in_over_argb_const_mask ##### --> +<para> + +</para> + +@i_n: +@s1_n: +@s2_1: +@n: + + +<!-- ##### FUNCTION oil_composite_in_over_argb_const_src ##### --> +<para> + +</para> + +@i_n: +@s1_1: +@s2_n: +@n: + + +<!-- ##### FUNCTION oil_composite_over_argb ##### --> +<para> + +</para> + +@i_n: +@s1_n: +@n: + + +<!-- ##### FUNCTION oil_composite_over_argb_const_src ##### --> +<para> + +</para> + +@i_n: +@s1_1: +@n: + + +<!-- ##### FUNCTION oil_merge_linear_argb ##### --> +<para> + +</para> + +@d_n: +@s_n: +@s2_n: +@s3_1: +@n: + + +<!-- ##### FUNCTION oil_resample_linear_argb ##### --> +<para> + +</para> + +@d_n: +@s_2xn: +@n: +@i_2: + + +<!-- ##### FUNCTION oil_resample_linear_u8 ##### --> +<para> + +</para> + +@d_n: +@s_2xn: +@n: +@i_2: + + <!-- ##### FUNCTION oil_rgb2bgr ##### --> <para> @@ -91,3 +261,33 @@ Converts an array of AYUV pixels to ARGB. @n: +<!-- ##### FUNCTION oil_uyvy2ayuv ##### --> +<para> + +</para> + +@d_n: +@s_n: +@n: + + +<!-- ##### FUNCTION oil_yuyv2ayuv ##### --> +<para> + +</para> + +@d_n: +@s_n: +@n: + + +<!-- ##### FUNCTION oil_yvyu2ayuv ##### --> +<para> + +</para> + +@d_n: +@s_n: +@n: + + diff --git a/examples/report.c b/examples/report.c index 108b563..a9fe8bd 100644 --- a/examples/report.c +++ b/examples/report.c @@ -143,11 +143,27 @@ impl_compare (const void *a, const void *b) } #endif +static int +get_n_impls(OilFunctionClass *klass) +{ + OilFunctionImpl *impl; + int n; + + n = 0; + for(impl=klass->first_impl;impl;impl=impl->next)n++; + + return n; +} + static void oil_print_class (OilFunctionClass *klass, int verbose) { + int n_impls; + + n_impls = get_n_impls(klass); - printf ("%-20s %-10g %-10g %-10.3g %s %s\n", klass->name, + printf ("%-20s,%d,%-10g,%-10g,%-10.3g,%s,%s\n", klass->name, + n_impls, klass->reference_impl->profile_ave, klass->chosen_impl->profile_ave, klass->reference_impl->profile_ave/klass->chosen_impl->profile_ave, diff --git a/liboil/Makefile.am b/liboil/Makefile.am index c6e0590..ebb98c2 100644 --- a/liboil/Makefile.am +++ b/liboil/Makefile.am @@ -1,7 +1,7 @@ pkgincludedir = $(includedir)/liboil-@LIBOIL_MAJORMINOR@/liboil -SUBDIRS = colorspace conv copy dct jpeg md5 motovec simdpack sse utf8 +SUBDIRS = colorspace conv copy dct jpeg math md5 motovec simdpack sse utf8 lib_LTLIBRARIES = liboiltmp1.la liboil-@LIBOIL_MAJORMINOR@.la @@ -26,6 +26,7 @@ liboilfunctions_la_LIBADD = \ copy/libcopy.la \ dct/libdct.la \ jpeg/libjpeg.la \ + math/libmath.la \ md5/libmd5.la \ motovec/libmotovec.la \ simdpack/libsimdpack.la \ diff --git a/liboil/colorspace/composite.c b/liboil/colorspace/composite.c index c0d1dd4..6cb2ef5 100644 --- a/liboil/colorspace/composite.c +++ b/liboil/colorspace/composite.c @@ -32,18 +32,21 @@ #include <liboil/liboil.h> #include <liboil/liboilfunction.h> +#define CLAMP_0(x) (((x)<0)?0:(x)) +#define CLAMP_255(x) (((x)>255)?255:(x)) +#define CLAMP(x) CLAMP_255(CLAMP_0(x)) -#define ARGB(a,r,g,b) (((a)<<24) | ((r)<<16) | ((g)<<8) | (b)) +#define ARGB(a,r,g,b) ((CLAMP(a)<<24) | (CLAMP(r)<<16) | (CLAMP(g)<<8) | (CLAMP(b)<<0)) #define ARGB_A(color) (((color)>>24)&0xff) #define ARGB_R(color) (((color)>>16)&0xff) #define ARGB_G(color) (((color)>>8)&0xff) #define ARGB_B(color) (((color)>>0)&0xff) -#define div255(x) (((x + 128) + ((x + 128)>>8))>>8) +#define div255(x) (((x) + ((x)>>8))>>8) -#define COMPOSE_OVER(d,s,m) ((d) + (s) - div255((d)*(a))) -#define COMPOSE_ADD(d,s) ((d) + (s)) -#define COMPOSE_IN(s,m) (div255((s)*(m))) +#define COMPOSITE_OVER(d,s,m) ((d) + (s) - div255((d)*(m))) +#define COMPOSITE_ADD(d,s) ((d) + (s)) +#define COMPOSITE_IN(s,m) (div255((s)*(m))) OIL_DEFINE_CLASS (composite_in_argb, "uint32_t *d_n, uint32_t *s1_n, uint8_t *s2_n, int n"); @@ -73,10 +76,10 @@ composite_in_argb_ref (uint32_t *dest, uint32_t *src, uint8_t *mask, int n) for(i=0;i<n;i++){ dest[i] = ARGB( - COMPOSE_IN(ARGB_A(src[i]), mask[i]), - COMPOSE_IN(ARGB_R(src[i]), mask[i]), - COMPOSE_IN(ARGB_G(src[i]), mask[i]), - COMPOSE_IN(ARGB_B(src[i]), mask[i])); + COMPOSITE_IN(ARGB_A(src[i]), mask[i]), + COMPOSITE_IN(ARGB_R(src[i]), mask[i]), + COMPOSITE_IN(ARGB_G(src[i]), mask[i]), + COMPOSITE_IN(ARGB_B(src[i]), mask[i])); } } OIL_DEFINE_IMPL_REF (composite_in_argb_ref, composite_in_argb); @@ -88,10 +91,10 @@ composite_in_argb_const_src_ref (uint32_t *dest, uint32_t *src, uint8_t *mask, i for(i=0;i<n;i++){ dest[i] = ARGB( - COMPOSE_IN(ARGB_A(src[0]), mask[i]), - COMPOSE_IN(ARGB_R(src[0]), mask[i]), - COMPOSE_IN(ARGB_G(src[0]), mask[i]), - COMPOSE_IN(ARGB_B(src[0]), mask[i])); + COMPOSITE_IN(ARGB_A(src[0]), mask[i]), + COMPOSITE_IN(ARGB_R(src[0]), mask[i]), + COMPOSITE_IN(ARGB_G(src[0]), mask[i]), + COMPOSITE_IN(ARGB_B(src[0]), mask[i])); } } OIL_DEFINE_IMPL_REF (composite_in_argb_const_src_ref, composite_in_argb_const_src); @@ -103,10 +106,10 @@ composite_in_argb_const_mask_ref (uint32_t *dest, uint32_t *src, uint8_t *mask, for(i=0;i<n;i++){ dest[i] = ARGB( - COMPOSE_IN(ARGB_A(src[i]), mask[0]), - COMPOSE_IN(ARGB_R(src[i]), mask[0]), - COMPOSE_IN(ARGB_G(src[i]), mask[0]), - COMPOSE_IN(ARGB_B(src[i]), mask[0])); + COMPOSITE_IN(ARGB_A(src[i]), mask[0]), + COMPOSITE_IN(ARGB_R(src[i]), mask[0]), + COMPOSITE_IN(ARGB_G(src[i]), mask[0]), + COMPOSITE_IN(ARGB_B(src[i]), mask[0])); } } OIL_DEFINE_IMPL_REF (composite_in_argb_const_mask_ref, composite_in_argb_const_mask); @@ -120,10 +123,10 @@ composite_over_argb_ref (uint32_t *dest, uint32_t *src, int n) for(i=0;i<n;i++){ a = ARGB_A(src[i]); dest[i] = ARGB( - COMPOSE_OVER(ARGB_A(dest[i]),ARGB_A(src[i]),a), - COMPOSE_OVER(ARGB_R(dest[i]),ARGB_R(src[i]),a), - COMPOSE_OVER(ARGB_G(dest[i]),ARGB_G(src[i]),a), - COMPOSE_OVER(ARGB_B(dest[i]),ARGB_B(src[i]),a)); + COMPOSITE_OVER(ARGB_A(dest[i]),ARGB_A(src[i]),a), + COMPOSITE_OVER(ARGB_R(dest[i]),ARGB_R(src[i]),a), + COMPOSITE_OVER(ARGB_G(dest[i]),ARGB_G(src[i]),a), + COMPOSITE_OVER(ARGB_B(dest[i]),ARGB_B(src[i]),a)); } } @@ -138,10 +141,10 @@ composite_over_argb_const_src_ref (uint32_t *dest, uint32_t *src, int n) a = ARGB_A(src[0]); for(i=0;i<n;i++){ dest[i] = ARGB( - COMPOSE_OVER(ARGB_A(dest[i]),ARGB_A(src[0]),a), - COMPOSE_OVER(ARGB_R(dest[i]),ARGB_R(src[0]),a), - COMPOSE_OVER(ARGB_G(dest[i]),ARGB_G(src[0]),a), - COMPOSE_OVER(ARGB_B(dest[i]),ARGB_B(src[0]),a)); + COMPOSITE_OVER(ARGB_A(dest[i]),ARGB_A(src[0]),a), + COMPOSITE_OVER(ARGB_R(dest[i]),ARGB_R(src[0]),a), + COMPOSITE_OVER(ARGB_G(dest[i]),ARGB_G(src[0]),a), + COMPOSITE_OVER(ARGB_B(dest[i]),ARGB_B(src[0]),a)); } } @@ -154,10 +157,10 @@ composite_add_argb_ref (uint32_t *dest, uint32_t *src, int n) for(i=0;i<n;i++){ dest[i] = ARGB( - COMPOSE_ADD(ARGB_A(dest[i]),ARGB_A(src[i])), - COMPOSE_ADD(ARGB_R(dest[i]),ARGB_R(src[i])), - COMPOSE_ADD(ARGB_G(dest[i]),ARGB_G(src[i])), - COMPOSE_ADD(ARGB_B(dest[i]),ARGB_B(src[i]))); + COMPOSITE_ADD(ARGB_A(dest[i]),ARGB_A(src[i])), + COMPOSITE_ADD(ARGB_R(dest[i]),ARGB_R(src[i])), + COMPOSITE_ADD(ARGB_G(dest[i]),ARGB_G(src[i])), + COMPOSITE_ADD(ARGB_B(dest[i]),ARGB_B(src[i]))); } } @@ -170,10 +173,10 @@ composite_add_argb_const_src_ref (uint32_t *dest, uint32_t *src, int n) for(i=0;i<n;i++){ dest[i] = ARGB( - COMPOSE_ADD(ARGB_A(dest[i]),ARGB_A(src[0])), - COMPOSE_ADD(ARGB_R(dest[i]),ARGB_R(src[0])), - COMPOSE_ADD(ARGB_G(dest[i]),ARGB_G(src[0])), - COMPOSE_ADD(ARGB_B(dest[i]),ARGB_B(src[0]))); + COMPOSITE_ADD(ARGB_A(dest[i]),ARGB_A(src[0])), + COMPOSITE_ADD(ARGB_R(dest[i]),ARGB_R(src[0])), + COMPOSITE_ADD(ARGB_G(dest[i]),ARGB_G(src[0])), + COMPOSITE_ADD(ARGB_B(dest[i]),ARGB_B(src[0]))); } } @@ -188,16 +191,16 @@ composite_in_over_argb_ref (uint32_t *dest, uint32_t *src, uint8_t *mask, int n) for(i=0;i<n;i++){ color = ARGB( - COMPOSE_IN(ARGB_A(src[i]), mask[i]), - COMPOSE_IN(ARGB_R(src[i]), mask[i]), - COMPOSE_IN(ARGB_G(src[i]), mask[i]), - COMPOSE_IN(ARGB_B(src[i]), mask[i])); + COMPOSITE_IN(ARGB_A(src[i]), mask[i]), + COMPOSITE_IN(ARGB_R(src[i]), mask[i]), + COMPOSITE_IN(ARGB_G(src[i]), mask[i]), + COMPOSITE_IN(ARGB_B(src[i]), mask[i])); a = ARGB_A(color); dest[i] = ARGB( - COMPOSE_OVER(ARGB_A(dest[i]),ARGB_A(color),a), - COMPOSE_OVER(ARGB_R(dest[i]),ARGB_R(color),a), - COMPOSE_OVER(ARGB_G(dest[i]),ARGB_G(color),a), - COMPOSE_OVER(ARGB_B(dest[i]),ARGB_B(color),a)); + COMPOSITE_OVER(ARGB_A(dest[i]),ARGB_A(color),a), + COMPOSITE_OVER(ARGB_R(dest[i]),ARGB_R(color),a), + COMPOSITE_OVER(ARGB_G(dest[i]),ARGB_G(color),a), + COMPOSITE_OVER(ARGB_B(dest[i]),ARGB_B(color),a)); } } @@ -212,16 +215,16 @@ composite_in_over_argb_const_src_ref (uint32_t *dest, uint32_t *src, uint8_t *ma for(i=0;i<n;i++){ color = ARGB( - COMPOSE_IN(ARGB_A(src[0]), mask[i]), - COMPOSE_IN(ARGB_R(src[0]), mask[i]), - COMPOSE_IN(ARGB_G(src[0]), mask[i]), - COMPOSE_IN(ARGB_B(src[0]), mask[i])); + COMPOSITE_IN(ARGB_A(src[0]), mask[i]), + COMPOSITE_IN(ARGB_R(src[0]), mask[i]), + COMPOSITE_IN(ARGB_G(src[0]), mask[i]), + COMPOSITE_IN(ARGB_B(src[0]), mask[i])); a = ARGB_A(color); dest[i] = ARGB( - COMPOSE_OVER(ARGB_A(dest[i]),ARGB_A(color),a), - COMPOSE_OVER(ARGB_R(dest[i]),ARGB_R(color),a), - COMPOSE_OVER(ARGB_G(dest[i]),ARGB_G(color),a), - COMPOSE_OVER(ARGB_B(dest[i]),ARGB_B(color),a)); + COMPOSITE_OVER(ARGB_A(dest[i]),ARGB_A(color),a), + COMPOSITE_OVER(ARGB_R(dest[i]),ARGB_R(color),a), + COMPOSITE_OVER(ARGB_G(dest[i]),ARGB_G(color),a), + COMPOSITE_OVER(ARGB_B(dest[i]),ARGB_B(color),a)); } } @@ -236,16 +239,16 @@ composite_in_over_argb_const_mask_ref (uint32_t *dest, uint32_t *src, uint8_t *m for(i=0;i<n;i++){ color = ARGB( - COMPOSE_IN(ARGB_A(src[i]), mask[0]), - COMPOSE_IN(ARGB_R(src[i]), mask[0]), - COMPOSE_IN(ARGB_G(src[i]), mask[0]), - COMPOSE_IN(ARGB_B(src[i]), mask[0])); + COMPOSITE_IN(ARGB_A(src[i]), mask[0]), + COMPOSITE_IN(ARGB_R(src[i]), mask[0]), + COMPOSITE_IN(ARGB_G(src[i]), mask[0]), + COMPOSITE_IN(ARGB_B(src[i]), mask[0])); a = ARGB_A(color); dest[i] = ARGB( - COMPOSE_OVER(ARGB_A(dest[i]),ARGB_A(color),a), - COMPOSE_OVER(ARGB_R(dest[i]),ARGB_R(color),a), - COMPOSE_OVER(ARGB_G(dest[i]),ARGB_G(color),a), - COMPOSE_OVER(ARGB_B(dest[i]),ARGB_B(color),a)); + COMPOSITE_OVER(ARGB_A(dest[i]),ARGB_A(color),a), + COMPOSITE_OVER(ARGB_R(dest[i]),ARGB_R(color),a), + COMPOSITE_OVER(ARGB_G(dest[i]),ARGB_G(color),a), + COMPOSITE_OVER(ARGB_B(dest[i]),ARGB_B(color),a)); } } diff --git a/liboil/liboilfuncs.h b/liboil/liboilfuncs.h index 07aaee6..2c55d12 100644 --- a/liboil/liboilfuncs.h +++ b/liboil/liboilfuncs.h @@ -51,6 +51,9 @@ typedef void (*_oil_type_abs_u32_s32)(uint32_t * dest, int dstr, const int32_t * extern OilFunctionClass *oil_function_class_ptr_abs_u8_s8; typedef void (*_oil_type_abs_u8_s8)(uint8_t * dest, int dstr, const int8_t * src, int sstr, int n); #define oil_abs_u8_s8 ((_oil_type_abs_u8_s8)(*(void **)oil_function_class_ptr_abs_u8_s8)) +extern OilFunctionClass *oil_function_class_ptr_add_f32; +typedef void (*_oil_type_add_f32)(float * d, const float * s1, const float * s2, int n); +#define oil_add_f32 ((_oil_type_add_f32)(*(void **)oil_function_class_ptr_add_f32)) extern OilFunctionClass *oil_function_class_ptr_argb_paint_u8; typedef void (*_oil_type_argb_paint_u8)(uint8_t * i_4xn, const uint8_t * s1_4, const uint8_t * s2_n, int n); #define oil_argb_paint_u8 ((_oil_type_argb_paint_u8)(*(void **)oil_function_class_ptr_argb_paint_u8)) @@ -60,6 +63,15 @@ typedef void (*_oil_type_average2_u8)(uint8_t * dest, int dstr, const uint8_t * extern OilFunctionClass *oil_function_class_ptr_ayuv2argb_u8; typedef void (*_oil_type_ayuv2argb_u8)(uint8_t * d_4xn, const uint8_t * s_4xn, int n); #define oil_ayuv2argb_u8 ((_oil_type_ayuv2argb_u8)(*(void **)oil_function_class_ptr_ayuv2argb_u8)) +extern OilFunctionClass *oil_function_class_ptr_ayuv2uyvy; +typedef void (*_oil_type_ayuv2uyvy)(uint32_t * d_n, const uint32_t * s_n, int n); +#define oil_ayuv2uyvy ((_oil_type_ayuv2uyvy)(*(void **)oil_function_class_ptr_ayuv2uyvy)) +extern OilFunctionClass *oil_function_class_ptr_ayuv2yuyv; +typedef void (*_oil_type_ayuv2yuyv)(uint32_t * d_n, const uint32_t * s_n, int n); +#define oil_ayuv2yuyv ((_oil_type_ayuv2yuyv)(*(void **)oil_function_class_ptr_ayuv2yuyv)) +extern OilFunctionClass *oil_function_class_ptr_ayuv2yvyu; +typedef void (*_oil_type_ayuv2yvyu)(uint32_t * d_n, const uint32_t * s_n, int n); +#define oil_ayuv2yvyu ((_oil_type_ayuv2yvyu)(*(void **)oil_function_class_ptr_ayuv2yvyu)) extern OilFunctionClass *oil_function_class_ptr_clip_f32; typedef void (*_oil_type_clip_f32)(float * dest, int dstr, const float * src, int sstr, int n, const float * s2_1, const float * s3_1); #define oil_clip_f32 ((_oil_type_clip_f32)(*(void **)oil_function_class_ptr_clip_f32)) @@ -393,6 +405,9 @@ typedef void (*_oil_type_dequantize8x8_s16)(int16_t * d_8x8, int dstr, const int extern OilFunctionClass *oil_function_class_ptr_diffsquaresum_f64; typedef void (*_oil_type_diffsquaresum_f64)(double * d_1, const double * src1, int sstr1, const double * src2, int sstr2, int n); #define oil_diffsquaresum_f64 ((_oil_type_diffsquaresum_f64)(*(void **)oil_function_class_ptr_diffsquaresum_f64)) +extern OilFunctionClass *oil_function_class_ptr_divide_f32; +typedef void (*_oil_type_divide_f32)(float * d, const float * s1, const float * s2, int n); +#define oil_divide_f32 ((_oil_type_divide_f32)(*(void **)oil_function_class_ptr_divide_f32)) extern OilFunctionClass *oil_function_class_ptr_fdct8_f64; typedef void (*_oil_type_fdct8_f64)(double * d_8, const double * s_8, int dstr, int sstr); #define oil_fdct8_f64 ((_oil_type_fdct8_f64)(*(void **)oil_function_class_ptr_fdct8_f64)) @@ -402,6 +417,9 @@ typedef void (*_oil_type_fdct8x8_f64)(double * d_8x8, int dstr, const double * s extern OilFunctionClass *oil_function_class_ptr_fdct8x8s_s16; typedef void (*_oil_type_fdct8x8s_s16)(int16_t * d_8x8, int ds, const int16_t * s_8x8, int ss); #define oil_fdct8x8s_s16 ((_oil_type_fdct8x8s_s16)(*(void **)oil_function_class_ptr_fdct8x8s_s16)) +extern OilFunctionClass *oil_function_class_ptr_floor_f32; +typedef void (*_oil_type_floor_f32)(float * d, const float * s, int n); +#define oil_floor_f32 ((_oil_type_floor_f32)(*(void **)oil_function_class_ptr_floor_f32)) extern OilFunctionClass *oil_function_class_ptr_idct8_f64; typedef void (*_oil_type_idct8_f64)(double * d_8, int dstr, const double * s_8, int sstr); #define oil_idct8_f64 ((_oil_type_idct8_f64)(*(void **)oil_function_class_ptr_idct8_f64)) @@ -432,6 +450,12 @@ typedef void (*_oil_type_imdct32_f32)(float * d_32, const float * s_32); extern OilFunctionClass *oil_function_class_ptr_imdct36_f64; typedef void (*_oil_type_imdct36_f64)(double * d_36, const double * s_18); #define oil_imdct36_f64 ((_oil_type_imdct36_f64)(*(void **)oil_function_class_ptr_imdct36_f64)) +extern OilFunctionClass *oil_function_class_ptr_inverse_f32; +typedef void (*_oil_type_inverse_f32)(float * d, const float * s, int n); +#define oil_inverse_f32 ((_oil_type_inverse_f32)(*(void **)oil_function_class_ptr_inverse_f32)) +extern OilFunctionClass *oil_function_class_ptr_maximum_f32; +typedef void (*_oil_type_maximum_f32)(float * d, const float * s1, const float * s2, int n); +#define oil_maximum_f32 ((_oil_type_maximum_f32)(*(void **)oil_function_class_ptr_maximum_f32)) extern OilFunctionClass *oil_function_class_ptr_md5; typedef void (*_oil_type_md5)(uint32_t * i_4, const uint32_t * s_16); #define oil_md5 ((_oil_type_md5)(*(void **)oil_function_class_ptr_md5)) @@ -444,18 +468,27 @@ typedef void (*_oil_type_mdct36_f64)(double * d_18, const double * s_36); extern OilFunctionClass *oil_function_class_ptr_merge_linear_argb; typedef void (*_oil_type_merge_linear_argb)(uint32_t * d_n, const uint32_t * s_n, const uint32_t * s2_n, const uint32_t * s3_1, int n); #define oil_merge_linear_argb ((_oil_type_merge_linear_argb)(*(void **)oil_function_class_ptr_merge_linear_argb)) +extern OilFunctionClass *oil_function_class_ptr_minimum_f32; +typedef void (*_oil_type_minimum_f32)(float * d, const float * s1, const float * s2, int n); +#define oil_minimum_f32 ((_oil_type_minimum_f32)(*(void **)oil_function_class_ptr_minimum_f32)) extern OilFunctionClass *oil_function_class_ptr_mix_u8; typedef void (*_oil_type_mix_u8)(uint8_t * dest, const uint8_t * src1, const uint8_t * src2, const uint8_t * src3, int n); #define oil_mix_u8 ((_oil_type_mix_u8)(*(void **)oil_function_class_ptr_mix_u8)) extern OilFunctionClass *oil_function_class_ptr_mult8x8_s16; typedef void (*_oil_type_mult8x8_s16)(int16_t * d_8x8, const int16_t * s1_8x8, const int16_t * s2_8x8, int ds, int ss1, int ss2); #define oil_mult8x8_s16 ((_oil_type_mult8x8_s16)(*(void **)oil_function_class_ptr_mult8x8_s16)) +extern OilFunctionClass *oil_function_class_ptr_multiply_f32; +typedef void (*_oil_type_multiply_f32)(float * d, const float * s1, const float * s2, int n); +#define oil_multiply_f32 ((_oil_type_multiply_f32)(*(void **)oil_function_class_ptr_multiply_f32)) extern OilFunctionClass *oil_function_class_ptr_multsum_f32; typedef void (*_oil_type_multsum_f32)(float * dest, const float * src1, int sstr1, const float * src2, int sstr2, int n); #define oil_multsum_f32 ((_oil_type_multsum_f32)(*(void **)oil_function_class_ptr_multsum_f32)) extern OilFunctionClass *oil_function_class_ptr_multsum_f64; typedef void (*_oil_type_multsum_f64)(double * dest, const double * src1, int sstr1, const double * src2, int sstr2, int n); #define oil_multsum_f64 ((_oil_type_multsum_f64)(*(void **)oil_function_class_ptr_multsum_f64)) +extern OilFunctionClass *oil_function_class_ptr_negative_f32; +typedef void (*_oil_type_negative_f32)(float * d, const float * s, int n); +#define oil_negative_f32 ((_oil_type_negative_f32)(*(void **)oil_function_class_ptr_negative_f32)) extern OilFunctionClass *oil_function_class_ptr_null; typedef void (*_oil_type_null)(void); #define oil_null ((_oil_type_null)(*(void **)oil_function_class_ptr_null)) @@ -513,6 +546,9 @@ typedef void (*_oil_type_sad8x8_u8)(uint32_t * d_1, const uint8_t * s1_8x8, int extern OilFunctionClass *oil_function_class_ptr_scalaradd_f32; typedef void (*_oil_type_scalaradd_f32)(float * dest, int dstr, const float * src, int sstr, const float * s2_1, int n); #define oil_scalaradd_f32 ((_oil_type_scalaradd_f32)(*(void **)oil_function_class_ptr_scalaradd_f32)) +extern OilFunctionClass *oil_function_class_ptr_scalaradd_f32_ns; +typedef void (*_oil_type_scalaradd_f32_ns)(float * d, const float * s1, const float * s2_1, int n); +#define oil_scalaradd_f32_ns ((_oil_type_scalaradd_f32_ns)(*(void **)oil_function_class_ptr_scalaradd_f32_ns)) extern OilFunctionClass *oil_function_class_ptr_scalaradd_f64; typedef void (*_oil_type_scalaradd_f64)(double * dest, int dstr, const double * src, int sstr, const double * s2_1, int n); #define oil_scalaradd_f64 ((_oil_type_scalaradd_f64)(*(void **)oil_function_class_ptr_scalaradd_f64)) @@ -558,6 +594,9 @@ typedef void (*_oil_type_scalarmult_u32)(uint32_t * dest, int dstr, const uint32 extern OilFunctionClass *oil_function_class_ptr_scalarmult_u8; typedef void (*_oil_type_scalarmult_u8)(uint8_t * dest, int dstr, const uint8_t * src, int sstr, const uint8_t * s2_1, int n); #define oil_scalarmult_u8 ((_oil_type_scalarmult_u8)(*(void **)oil_function_class_ptr_scalarmult_u8)) +extern OilFunctionClass *oil_function_class_ptr_scalarmultiply_f32_ns; +typedef void (*_oil_type_scalarmultiply_f32_ns)(float * d, const float * s1, const float * s2_1, int n); +#define oil_scalarmultiply_f32_ns ((_oil_type_scalarmultiply_f32_ns)(*(void **)oil_function_class_ptr_scalarmultiply_f32_ns)) extern OilFunctionClass *oil_function_class_ptr_scaleconv_f32_s16; typedef void (*_oil_type_scaleconv_f32_s16)(float * dest, const int16_t * src, int n, const double * s2_1, const double * s3_1); #define oil_scaleconv_f32_s16 ((_oil_type_scaleconv_f32_s16)(*(void **)oil_function_class_ptr_scaleconv_f32_s16)) @@ -633,6 +672,9 @@ typedef void (*_oil_type_scaleconv_u8_f64)(uint8_t * dest, const double * src, i extern OilFunctionClass *oil_function_class_ptr_scanlinescale2_u8; typedef void (*_oil_type_scanlinescale2_u8)(uint8_t * dest, const uint8_t * src, int n); #define oil_scanlinescale2_u8 ((_oil_type_scanlinescale2_u8)(*(void **)oil_function_class_ptr_scanlinescale2_u8)) +extern OilFunctionClass *oil_function_class_ptr_sign_f32; +typedef void (*_oil_type_sign_f32)(float * d, const float * s, int n); +#define oil_sign_f32 ((_oil_type_sign_f32)(*(void **)oil_function_class_ptr_sign_f32)) extern OilFunctionClass *oil_function_class_ptr_sincos_f64; typedef void (*_oil_type_sincos_f64)(double * dest1, double * dest2, int n, const double * s1_1, const double * s2_1); #define oil_sincos_f64 ((_oil_type_sincos_f64)(*(void **)oil_function_class_ptr_sincos_f64)) @@ -651,6 +693,9 @@ typedef void (*_oil_type_splat_u8_ns)(uint8_t * dest, const uint8_t * s1_1, int extern OilFunctionClass *oil_function_class_ptr_squaresum_f64; typedef void (*_oil_type_squaresum_f64)(double * dest, const double * src, int n); #define oil_squaresum_f64 ((_oil_type_squaresum_f64)(*(void **)oil_function_class_ptr_squaresum_f64)) +extern OilFunctionClass *oil_function_class_ptr_subtract_f32; +typedef void (*_oil_type_subtract_f32)(float * d, const float * s1, const float * s2, int n); +#define oil_subtract_f32 ((_oil_type_subtract_f32)(*(void **)oil_function_class_ptr_subtract_f32)) extern OilFunctionClass *oil_function_class_ptr_sum_f64; typedef void (*_oil_type_sum_f64)(double * dest, const double * src, int sstr, int n); #define oil_sum_f64 ((_oil_type_sum_f64)(*(void **)oil_function_class_ptr_sum_f64)) @@ -675,6 +720,9 @@ typedef void (*_oil_type_unzigzag8x8_s16)(int16_t * d_8x8, int ds, const int16_t extern OilFunctionClass *oil_function_class_ptr_utf8_validate; typedef void (*_oil_type_utf8_validate)(int32_t * d_1, const uint8_t * s, int n); #define oil_utf8_validate ((_oil_type_utf8_validate)(*(void **)oil_function_class_ptr_utf8_validate)) +extern OilFunctionClass *oil_function_class_ptr_uyvy2ayuv; +typedef void (*_oil_type_uyvy2ayuv)(uint32_t * d_n, const uint32_t * s_n, int n); +#define oil_uyvy2ayuv ((_oil_type_uyvy2ayuv)(*(void **)oil_function_class_ptr_uyvy2ayuv)) extern OilFunctionClass *oil_function_class_ptr_vectoradd_f32; typedef void (*_oil_type_vectoradd_f32)(float * dest, int dstr, const float * src1, int sstr1, const float * src2, int sstr2, int n, const float * s3_1, const float * s4_1); #define oil_vectoradd_f32 ((_oil_type_vectoradd_f32)(*(void **)oil_function_class_ptr_vectoradd_f32)) @@ -708,6 +756,12 @@ typedef void (*_oil_type_yuv2rgbx_sub4_u8)(uint8_t * d_4xn, const uint8_t * src1 extern OilFunctionClass *oil_function_class_ptr_yuv2rgbx_u8; typedef void (*_oil_type_yuv2rgbx_u8)(uint8_t * d_4xn, const uint8_t * src1, const uint8_t * src2, const uint8_t * src3, int n); #define oil_yuv2rgbx_u8 ((_oil_type_yuv2rgbx_u8)(*(void **)oil_function_class_ptr_yuv2rgbx_u8)) +extern OilFunctionClass *oil_function_class_ptr_yuyv2ayuv; +typedef void (*_oil_type_yuyv2ayuv)(uint32_t * d_n, const uint32_t * s_n, int n); +#define oil_yuyv2ayuv ((_oil_type_yuyv2ayuv)(*(void **)oil_function_class_ptr_yuyv2ayuv)) +extern OilFunctionClass *oil_function_class_ptr_yvyu2ayuv; +typedef void (*_oil_type_yvyu2ayuv)(uint32_t * d_n, const uint32_t * s_n, int n); +#define oil_yvyu2ayuv ((_oil_type_yvyu2ayuv)(*(void **)oil_function_class_ptr_yvyu2ayuv)) extern OilFunctionClass *oil_function_class_ptr_zigzag8x8_s16; typedef void (*_oil_type_zigzag8x8_s16)(int16_t * d_8x8, int ds, const int16_t * s_8x8, int ss); #define oil_zigzag8x8_s16 ((_oil_type_zigzag8x8_s16)(*(void **)oil_function_class_ptr_zigzag8x8_s16)) diff --git a/liboil/liboiltest.c b/liboil/liboiltest.c index 2f5c430..82dbb70 100644 --- a/liboil/liboiltest.c +++ b/liboil/liboiltest.c @@ -79,7 +79,7 @@ oil_test_new (OilFunctionClass *klass) } test->iterations = 10; - test->n = 1000; + test->n = 100; test->m = 100; return test; diff --git a/liboil/math/Makefile.am b/liboil/math/Makefile.am new file mode 100644 index 0000000..9922c62 --- /dev/null +++ b/liboil/math/Makefile.am @@ -0,0 +1,33 @@ + +if USE_ALT_OPT +opt_libs = libmath_opt1.la +else +opt_libs = +endif +noinst_LTLIBRARIES = libmath.la $(opt_libs) + +c_sources = \ + math.c + +if HAVE_CPU_POWERPC +powerpc_sources = +else +powerpc_sources = +endif + +if HAVE_CPU_I386 +i386_sources = +else +i386_sources = +endif + +libmath_la_SOURCES = \ + $(c_sources) \ + $(powerpc_sources) \ + $(i386_sources) +libmath_la_LIBADD = $(opt_libs) +libmath_la_CFLAGS = $(LIBOIL_CFLAGS) + +libmath_opt1_la_SOURCES = $(c_sources) +libmath_opt1_la_CFLAGS = $(LIBOIL_CFLAGS) $(LIBOIL_OPT_CFLAGS) + diff --git a/liboil/math/math.c b/liboil/math/math.c new file mode 100644 index 0000000..817c395 --- /dev/null +++ b/liboil/math/math.c @@ -0,0 +1,273 @@ +/* + * LIBOIL - Library of Optimized Inner Loops + * Copyright (c) 2005 David A. Schleef <ds@schleef.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <math.h> + +#include <liboil/liboil.h> +#include <liboil/liboilfunction.h> +#include <liboil/liboiltest.h> + + +OIL_DEFINE_CLASS (add_f32, "float *d, float *s1, float *s2, int n"); +OIL_DEFINE_CLASS (subtract_f32, "float *d, float *s1, float *s2, int n"); +OIL_DEFINE_CLASS (multiply_f32, "float *d, float *s1, float *s2, int n"); +OIL_DEFINE_CLASS (divide_f32, "float *d, float *s1, float *s2, int n"); +OIL_DEFINE_CLASS (minimum_f32, "float *d, float *s1, float *s2, int n"); +OIL_DEFINE_CLASS (maximum_f32, "float *d, float *s1, float *s2, int n"); + +OIL_DEFINE_CLASS (negative_f32, "float *d, float *s, int n"); +OIL_DEFINE_CLASS (inverse_f32, "float *d, float *s, int n"); +OIL_DEFINE_CLASS (sign_f32, "float *d, float *s, int n"); +OIL_DEFINE_CLASS (floor_f32, "float *d, float *s, int n"); + +OIL_DEFINE_CLASS (scalaradd_f32_ns, "float *d, float *s1, float *s2_1, int n"); +OIL_DEFINE_CLASS (scalarmultiply_f32_ns, "float *d, float *s1, float *s2_1, int n"); + +static void +add_f32_ref (float *dest, float *src1, float *src2, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[i] = src1[i] + src2[i]; + } +} +OIL_DEFINE_IMPL_REF (add_f32_ref, add_f32); + +static void +subtract_f32_ref (float *dest, float *src1, float *src2, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[i] = src1[i] - src2[i]; + } +} +OIL_DEFINE_IMPL_REF (subtract_f32_ref, subtract_f32); + +static void +multiply_f32_ref (float *dest, float *src1, float *src2, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[i] = src1[i] * src2[i]; + } +} +OIL_DEFINE_IMPL_REF (multiply_f32_ref, multiply_f32); + +static void +divide_f32_ref (float *dest, float *src1, float *src2, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[i] = src1[i] / src2[i]; + } +} +OIL_DEFINE_IMPL_REF (divide_f32_ref, divide_f32); + +static void +minimum_f32_ref (float *dest, float *src1, float *src2, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i]; + } +} +OIL_DEFINE_IMPL_REF (minimum_f32_ref, minimum_f32); + +static void +maximum_f32_ref (float *dest, float *src1, float *src2, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i]; + } +} +OIL_DEFINE_IMPL_REF (maximum_f32_ref, maximum_f32); + +static void +negative_f32_ref (float *dest, float *src1, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[i] = -src1[i]; + } +} +OIL_DEFINE_IMPL_REF (negative_f32_ref, negative_f32); + +static void +inverse_f32_ref (float *dest, float *src1, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[i] = 1.0/src1[i]; + } +} +OIL_DEFINE_IMPL_REF (inverse_f32_ref, inverse_f32); + +static void +sign_f32_ref (float *dest, float *src1, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[i] = (src1[i] < 0) ? -src1[i] : src1[i]; + } +} +OIL_DEFINE_IMPL_REF (sign_f32_ref, sign_f32); + +static void +floor_f32_ref (float *dest, float *src1, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[i] = floor(src1[i]); + } +} +OIL_DEFINE_IMPL_REF (floor_f32_ref, floor_f32); + + + +static void +add_f32_unroll4 (float *dest, float *src1, float *src2, int n) +{ + int i; + + for(i=0;i<(n&(~0x3));i+=4){ + dest[i+0] = src1[i+0] + src2[i+0]; + dest[i+1] = src1[i+1] + src2[i+1]; + dest[i+2] = src1[i+2] + src2[i+2]; + dest[i+3] = src1[i+3] + src2[i+3]; + } + for(;i<n;i++){ + dest[i] = src1[i] + src2[i]; + } +} +OIL_DEFINE_IMPL (add_f32_unroll4, add_f32); + +static void +add_f32_unroll4b (float *dest, float *src1, float *src2, int n) +{ + int i; + + for(i=0;i<(n&(~0x3));i+=4){ + *dest++ = *src1++ + *src2++; + *dest++ = *src1++ + *src2++; + *dest++ = *src1++ + *src2++; + *dest++ = *src1++ + *src2++; + } + for(;i<n;i++){ + *dest++ = *src1++ + *src2++; + } +} +OIL_DEFINE_IMPL (add_f32_unroll4b, add_f32); + +static void +scalaradd_f32_ns_ref (float *dest, float *src1, float *src2, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[i] = src1[i] + src2[0]; + } +} +OIL_DEFINE_IMPL_REF (scalaradd_f32_ns_ref, scalaradd_f32_ns); + +static void +scalarmultiply_f32_ns_ref (float *dest, float *src1, float *src2, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[i] = src1[i] * src2[0]; + } +} +OIL_DEFINE_IMPL_REF (scalarmultiply_f32_ns_ref, scalarmultiply_f32_ns); + + +static void +multiply_f32_unroll4 (float *dest, float *src1, float *src2, int n) +{ + int i; + + for(i=0;i<(n&(~0x3));i+=4){ + dest[i+0] = src1[i+0] * src2[i+0]; + dest[i+1] = src1[i+1] * src2[i+1]; + dest[i+2] = src1[i+2] * src2[i+2]; + dest[i+3] = src1[i+3] * src2[i+3]; + } + for(;i<n;i++){ + dest[i] = src1[i] * src2[i]; + } +} +OIL_DEFINE_IMPL (multiply_f32_unroll4, multiply_f32); + +static void +scalaradd_f32_ns_unroll4 (float *dest, float *src1, float *src2, int n) +{ + int i; + + for(i=0;i<(n&(~0x3));i+=4){ + dest[i+0] = src1[i+0] + src2[0]; + dest[i+1] = src1[i+1] + src2[0]; + dest[i+2] = src1[i+2] + src2[0]; + dest[i+3] = src1[i+3] + src2[0]; + } + for(;i<n;i++){ + dest[i] = src1[i] + src2[0]; + } +} +OIL_DEFINE_IMPL (scalaradd_f32_ns_unroll4, scalaradd_f32_ns); + +static void +scalarmultiply_f32_ns_unroll4 (float *dest, float *src1, float *src2, int n) +{ + int i; + + for(i=0;i<(n&(~0x3));i+=4){ + dest[i+0] = src1[i+0] * src2[0]; + dest[i+1] = src1[i+1] * src2[0]; + dest[i+2] = src1[i+2] * src2[0]; + dest[i+3] = src1[i+3] * src2[0]; + } + for(;i<n;i++){ + dest[i] = src1[i] * src2[0]; + } +} +OIL_DEFINE_IMPL (scalarmultiply_f32_ns_unroll4, scalarmultiply_f32_ns); + |