diff options
-rw-r--r-- | COPYING | 34 | ||||
-rw-r--r-- | ChangeLog | 43 | ||||
-rw-r--r-- | configure.ac | 42 | ||||
-rw-r--r-- | doc/liboil-sections.txt | 3 | ||||
-rw-r--r-- | doc/tmpl/liboil-unused.sgml | 21 | ||||
-rw-r--r-- | doc/tmpl/liboilimpl-unstable.sgml | 12 | ||||
-rw-r--r-- | examples/Makefile.am | 6 | ||||
-rw-r--r-- | examples/oil-inspect.c | 4 | ||||
-rw-r--r-- | examples/oil-mt19937.c | 214 | ||||
-rw-r--r-- | liboil/Makefile.am | 70 | ||||
-rw-r--r-- | liboil/i386/Makefile.am | 1 | ||||
-rw-r--r-- | liboil/i386/mt19937.c | 301 | ||||
-rw-r--r-- | liboil/liboilclasses.h | 1 | ||||
-rw-r--r-- | liboil/liboilfuncs-04.h | 1 | ||||
-rw-r--r-- | liboil/liboilfuncs.h | 3 | ||||
-rw-r--r-- | liboil/liboilfunction.h | 68 | ||||
-rw-r--r-- | liboil/liboilrandom.c | 46 | ||||
-rw-r--r-- | liboil/liboilrandom.h | 4 | ||||
-rw-r--r-- | liboil/liboiltest.c | 2 | ||||
-rw-r--r-- | liboil/liboiltrampolines.c | 10 | ||||
-rw-r--r-- | liboil/motovec/Makefile.am | 11 | ||||
-rw-r--r-- | liboil/ref/Makefile.am | 1 | ||||
-rw-r--r-- | liboil/ref/mt19937ar.c | 93 | ||||
-rw-r--r-- | license_block | 2 | ||||
-rw-r--r-- | m4/as-host-defines.m4 | 33 | ||||
-rw-r--r-- | testsuite/Makefile.am | 2 | ||||
-rw-r--r-- | testsuite/instruction/Makefile.am | 6 | ||||
-rw-r--r-- | testsuite/mmx_engine.c | 221 |
28 files changed, 1095 insertions, 160 deletions
@@ -56,3 +56,37 @@ following license: INABILITY TO USE THE SOFTWARE. Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. + +The source code implementing the Mersenne Twister algorithm is +subject to the following license: + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + @@ -1,3 +1,46 @@ +2006-01-30 David Schleef <ds@schleef.org> + + * configure.ac: + * m4/as-host-defines.m4: + * liboil/Makefile.am: + * liboil/motovec/Makefile.am: + * doc/liboil-sections.txt: + * doc/tmpl/liboil-unused.sgml: + * doc/tmpl/liboilimpl-unstable.sgml: + * examples/oil-inspect.c: + * testsuite/instruction/Makefile.am: + Rewrite macros/defines for cpu selection. HAVE_${arch} is + the define for cpu architecture, HAVE_GCC_ASM is the define + for GCC inline assembly. + * liboil/liboilfunction.h: + Make implementation flags all part of the same enum, making + flags unique across all architectures. This makes it easier + to turn flags into feature names platform-independently. + + * liboil/liboilclasses.h: update + * liboil/liboilfuncs-04.h: + * liboil/liboilfuncs.h: + * liboil/liboiltrampolines.c: + + * liboil/liboilrandom.c: Fix documentation. + * liboil/liboiltest.c: same + * liboil/liboilrandom.h: remove config.h + + * license_block: new year + + * COPYING: Add copyright for mt19937 + * examples/Makefile.am: + * examples/oil-mt19937.c: + * liboil/ref/Makefile.am: + * liboil/ref/mt19937ar.c: + * liboil/i386/Makefile.am: + * liboil/i386/mt19937.c: + Add mt19937 and example. + + * testsuite/Makefile.am: + * testsuite/mmx_engine.c: + A little thingy for testing the features of an MMX engine. + 2006-01-28 David Schleef <ds@schleef.org> * liboil/i386/composite_i386.c: diff --git a/configure.ac b/configure.ac index 2eca618..4e7909a 100644 --- a/configure.ac +++ b/configure.ac @@ -90,44 +90,22 @@ fi GTK_DOC_CHECK([1.0]) -AS_GCC_INLINE_ASSEMBLY([HAVE_GCC_INLINE_ASSEMBLY=yes], - [HAVE_GCC_INLINE_ASSEMBLY=no]) -if test x$HAVE_GCC_INLINE_ASSEMBLY = xyes ; then - AC_DEFINE(HAVE_GCC_INLINE_ASSEMBLY, 1, - [Define if compiler supports gcc inline assembly for powerpc]) -fi -AM_CONDITIONAL(HAVE_GCC_INLINE_ASSEMBLY, - test "x$HAVE_GCC_INLINE_ASSEMBLY" = "xyes") - -dnl Determine CPU -if test "x$HAVE_GCC_INLINE_ASSEMBLY" = xyes ; then - case "x${host_cpu}" in - xi?86 | k?) - HAVE_GCC_I386=yes - AC_DEFINE(HAVE_GCC_I386, 1, [Define if compiler supports gcc inline assembly for i386]) - ;; - xx86_64|xamd64) - HAVE_GCC_AMD64=yes - AC_DEFINE(HAVE_GCC_AMD64, 1, [Define if compiler supports gcc inline assembly for amd64]) - ;; - xpowerpc) - HAVE_GCC_POWERPC=yes - AC_DEFINE(HAVE_GCC_POWERPC, 1, [Define if compiler supports gcc inline assembly for powerpc]) - ;; - esac +AS_GCC_INLINE_ASSEMBLY([HAVE_GCC_ASM=yes], [HAVE_GCC_ASM=no]) +if test x$HAVE_GCC_ASM = xyes ; then + AC_DEFINE(HAVE_GCC_ASM, 1, + [Define if compiler supports gcc inline assembly]) fi +AM_CONDITIONAL(HAVE_GCC_ASM, test "x$HAVE_GCC_ASM" = "xyes") -AM_CONDITIONAL(HAVE_GCC_I386, test "x$HAVE_GCC_I386" = "xyes") -AM_CONDITIONAL(HAVE_GCC_AMD64, test "x$HAVE_GCC_AMD64" = "xyes") -AM_CONDITIONAL(HAVE_GCC_POWERPC, test "x$HAVE_GCC_POWERPC" = "xyes") +AS_HOST_DEFINES() AS_GCC_ASM_POWERPC_FPU([HAVE_GCC_ASM_POWERPC_FPU=yes], [HAVE_GCC_ASM_POWERPC_FPU=no]) -if test x$HAVE_GCC_ASM_POWERPC = xyes ; then - AC_DEFINE(HAVE_GCC_ASM_POWERPC, 1, +if test x$HAVE_GCC_ASM_POWERPC_FPU = xyes ; then + AC_DEFINE(HAVE_GCC_ASM_POWERPC_FPU, 1, [Define if compiler supports PowerPC FPU instructions]) fi -AM_CONDITIONAL(HAVE_GCC_ASM_POWERPC, +AM_CONDITIONAL(HAVE_GCC_ASM_POWERPC_FPU, test "x$HAVE_GCC_ASM_POWERPC" = "xyes") AC_C_BIGENDIAN @@ -166,7 +144,7 @@ if test "x$LIBOIL_CVS" = "xyes" then AS_COMPILER_FLAG(-Werror, LIBOIL_CFLAGS="$LIBOIL_CFLAGS -Werror") fi -if test x$HAVE_GCC_POWERPC = xyes ; then +if test x$HAVE_GCC_ASM = xyes -a x$HAVE_POWERPC = xyes ; then AS_COMPILER_FLAG(["-Wa,-mregnames"], [LIBOIL_CFLAGS="$LIBOIL_CFLAGS -Wa,-mregnames"], true) diff --git a/doc/liboil-sections.txt b/doc/liboil-sections.txt index b9f9309..2c8605b 100644 --- a/doc/liboil-sections.txt +++ b/doc/liboil-sections.txt @@ -57,8 +57,6 @@ OilFunctionImpl oil_impl_get_by_index oil_impl_is_runnable OilImplFlag -OilImplFlagI386 -OilImplFlagPowerPC OIL_CPU_FLAG_MASK OIL_DEFINE_IMPL_FULL OIL_DEFINE_IMPL @@ -72,7 +70,6 @@ OIL_DEFINE_IMPL_DEPENDS OilProfile OIL_PROFILE_HIST_LENGTH oil_profile_stamp -oil_profile_stamp_gtod oil_profile_init oil_profile_stop_handle oil_profile_get_ave_std diff --git a/doc/tmpl/liboil-unused.sgml b/doc/tmpl/liboil-unused.sgml index c687bf2..a851ecc 100644 --- a/doc/tmpl/liboil-unused.sgml +++ b/doc/tmpl/liboil-unused.sgml @@ -131,6 +131,27 @@ </para> +<!-- ##### ENUM OilImplFlagI386 ##### --> +<para> + +</para> + +@OIL_IMPL_FLAG_CMOV: +@OIL_IMPL_FLAG_MMX: +@OIL_IMPL_FLAG_SSE: +@OIL_IMPL_FLAG_MMXEXT: +@OIL_IMPL_FLAG_SSE2: +@OIL_IMPL_FLAG_3DNOW: +@OIL_IMPL_FLAG_3DNOWEXT: +@OIL_IMPL_FLAG_SSE3: + +<!-- ##### ENUM OilImplFlagPowerPC ##### --> +<para> + +</para> + +@OIL_IMPL_FLAG_ALTIVEC: + <!-- ##### FUNCTION oil_profile_stamp_gtod ##### --> <para> diff --git a/doc/tmpl/liboilimpl-unstable.sgml b/doc/tmpl/liboilimpl-unstable.sgml index e3a07f2..e93e3c5 100644 --- a/doc/tmpl/liboilimpl-unstable.sgml +++ b/doc/tmpl/liboilimpl-unstable.sgml @@ -50,12 +50,6 @@ @OIL_IMPL_FLAG_OPT: @OIL_IMPL_FLAG_ASM: @OIL_IMPL_FLAG_DISABLED: - -<!-- ##### ENUM OilImplFlagI386 ##### --> -<para> - -</para> - @OIL_IMPL_FLAG_CMOV: @OIL_IMPL_FLAG_MMX: @OIL_IMPL_FLAG_SSE: @@ -64,12 +58,6 @@ @OIL_IMPL_FLAG_3DNOW: @OIL_IMPL_FLAG_3DNOWEXT: @OIL_IMPL_FLAG_SSE3: - -<!-- ##### ENUM OilImplFlagPowerPC ##### --> -<para> - -</para> - @OIL_IMPL_FLAG_ALTIVEC: <!-- ##### MACRO OIL_CPU_FLAG_MASK ##### --> diff --git a/examples/Makefile.am b/examples/Makefile.am index c271c11..780c271 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -2,7 +2,7 @@ SUBDIRS = jpeg md5 uberopt work huffman taylor noinst_PROGRAMS = example1 oil-graph oil-inspect oil-test report memcpy-speed \ - oil-suggest printcpu + oil-suggest printcpu oil-mt19937 example1_SOURCES = example1.c @@ -37,3 +37,7 @@ printcpu_SOURCES = printcpu.c printcpu_CFLAGS = $(LIBOIL_CFLAGS) printcpu_LDADD = $(LIBOIL_LIBS) +oil_mt19937_SOURCES = oil-mt19937.c +oil_mt19937_CFLAGS = $(LIBOIL_CFLAGS) +oil_mt19937_LDADD = $(LIBOIL_LIBS) + diff --git a/examples/oil-inspect.c b/examples/oil-inspect.c index 6ab2e14..a9f13ca 100644 --- a/examples/oil-inspect.c +++ b/examples/oil-inspect.c @@ -76,7 +76,6 @@ oil_flags_to_string (unsigned int flags) ret = string_append (ret, "altopt"); if (flags & OIL_IMPL_FLAG_ASM) ret = string_append (ret, "asm"); -#ifdef HAVE_CPU_I386 if (flags & OIL_IMPL_FLAG_CMOV) ret = string_append (ret, "cmov"); if (flags & OIL_IMPL_FLAG_MMX) @@ -91,11 +90,8 @@ oil_flags_to_string (unsigned int flags) ret = string_append (ret, "3dnow"); if (flags & OIL_IMPL_FLAG_3DNOWEXT) ret = string_append (ret, "3dnowext"); -#endif -#ifdef HAVE_CPU_PPC if (flags & OIL_IMPL_FLAG_ALTIVEC) ret = string_append (ret, "altivec"); -#endif return ret; } diff --git a/examples/oil-mt19937.c b/examples/oil-mt19937.c new file mode 100644 index 0000000..8d7c527 --- /dev/null +++ b/examples/oil-mt19937.c @@ -0,0 +1,214 @@ +/* + * LIBOIL - Library of Optimized Inner Loops + * Copyright (c) 2006 David A. Schleef <ds@schleef.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +/* + Notes about the liboil version: + + This program is an adaptation of the Mersenne Twister example + program downloaded from the web site listed below. The kernel + of the generator is implemented in liboil, and the function + genrand_int32() has been replaced with a library call. Note + that the liboil function calculates an entire output array at + once instead of individually like the original. This makes it + easier to use memcpy to copy out many outputs at once. + + Notes from the original authors: + + Any feedback is very welcome. + http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) +*/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> + +#include <liboil/liboil.h> + +/* Period parameters */ +#define N 624 +#define M 397 +#define MATRIX_A 0x9908b0dfUL /* constant vector a */ +#define UPPER_MASK 0x80000000UL /* most significant w-r bits */ +#define LOWER_MASK 0x7fffffffUL /* least significant r bits */ + +static uint32_t mt[N]; /* the array for the state vector */ +static uint32_t mt_outputs[N]; /* the array for the outputs */ +static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */ + +/* initializes mt[N] with a seed */ +void init_genrand(unsigned long s) +{ + mt[0]= s & 0xffffffffUL; + for (mti=1; mti<N; mti++) { + mt[mti] = + (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt[mti] &= 0xffffffffUL; + /* for >32 bit machines */ + } +} + +/* initialize by an array with array-length */ +/* init_key is the array for initializing keys */ +/* key_length is its length */ +/* slight change for C++, 2004/2/26 */ +void init_by_array(unsigned long init_key[], int key_length) +{ + int i, j, k; + init_genrand(19650218UL); + i=1; j=0; + k = (N>key_length ? N : key_length); + for (; k; k--) { + mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL)) + + init_key[j] + j; /* non linear */ + mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ + i++; j++; + if (i>=N) { mt[0] = mt[N-1]; i=1; } + if (j>=key_length) j=0; + } + for (k=N-1; k; k--) { + mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL)) + - i; /* non linear */ + mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ + i++; + if (i>=N) { mt[0] = mt[N-1]; i=1; } + } + + mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ +} + +/* generates a random number on [0,0xffffffff]-interval */ +unsigned long genrand_int32(void) +{ + if (mti >= N) { /* generate N words at one time */ + oil_mt19937 (mt_outputs, mt); + mti = 0; + } + + return mt_outputs[mti++]; +} + +/* generates a random number on [0,0x7fffffff]-interval */ +long genrand_int31(void) +{ + return (long)(genrand_int32()>>1); +} + +/* generates a random number on [0,1]-real-interval */ +double genrand_real1(void) +{ + return genrand_int32()*(1.0/4294967295.0); + /* divided by 2^32-1 */ +} + +/* generates a random number on [0,1)-real-interval */ +double genrand_real2(void) +{ + return genrand_int32()*(1.0/4294967296.0); + /* divided by 2^32 */ +} + +/* generates a random number on (0,1)-real-interval */ +double genrand_real3(void) +{ + return (((double)genrand_int32()) + 0.5)*(1.0/4294967296.0); + /* divided by 2^32 */ +} + +/* generates a random number on [0,1) with 53-bit resolution*/ +double genrand_res53(void) +{ + unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6; + return(a*67108864.0+b)*(1.0/9007199254740992.0); +} +/* These real versions are due to Isaku Wada, 2002/01/09 added */ + +int main(void) +{ + int i; + unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4; + + oil_init(); + + init_by_array(init, length); + printf("1000 outputs of genrand_int32()\n"); + for (i=0; i<1000; i++) { + printf("%10lu ", genrand_int32()); + if (i%5==4) printf("\n"); + } + printf("\n1000 outputs of genrand_real2()\n"); + for (i=0; i<1000; i++) { + printf("%10.8f ", genrand_real2()); + if (i%5==4) printf("\n"); + } + return 0; +} diff --git a/liboil/Makefile.am b/liboil/Makefile.am index 15718f9..1affa7c 100644 --- a/liboil/Makefile.am +++ b/liboil/Makefile.am @@ -1,62 +1,50 @@ pkgincludedir = $(includedir)/liboil-@LIBOIL_MAJORMINOR@/liboil -if HAVE_GCC_I386 -subdir_i386 = i386 -libs_i386 = i386/libi386.la +DIST_SUBDIRS = amd64 3dnow c colorspace conv copy dct fb i386 jpeg math md5 mmx motovec powerpc powerpc_asm_blocks ref simdpack sse utf8 deprecated +SUBDIRS = c colorspace conv copy dct jpeg math md5 ref simdpack utf8 deprecated + +libs = + +if HAVE_I386 +if HAVE_GCC_ASM +SUBDIRS += i386 +libs += i386/libi386.la +endif +endif + if HAVE_MMX_INTRINSICS -subdir_i386 += fb -libs_i386 += fb/libfb.la -subdir_i386 += mmx -libs_i386 += mmx/libmmx.la +SUBDIRS += fb mmx +libs += fb/libfb.la mmx/libmmx.la endif if HAVE_SSE_INTRINSICS -subdir_i386 += sse -libs_i386 += sse/libsse.la +SUBDIRS += sse +libs += sse/libsse.la endif if HAVE_3DNOW_INTRINSICS #subdir_i386 += 3dnow #libs_i386 += 3dnow/lib3dnow.la endif -else -subdir_i386 = -libs_i386 = -endif -if HAVE_GCC_AMD64 -subdir_amd64 = -libs_amd64 = -if HAVE_MMX_INTRINSICS -subdir_amd64 += fb -libs_amd64 += fb/libfb.la -subdir_amd64 += mmx -libs_amd64 += mmx/libmmx.la -endif -if HAVE_SSE_INTRINSICS -subdir_amd64 += sse -libs_amd64 += sse/libsse.la +if HAVE_AMD64 +if HAVE_GCC_ASM +SUBDIRS += +libs += endif -else -subdir_amd64 = -libs_amd64 = endif -if HAVE_GCC_POWERPC -subdir_powerpc = powerpc motovec -libs_powerpc = powerpc/libpowerpc.la motovec/libmotovec.la +if HAVE_POWERPC +if HAVE_GCC_ASM +SUBDIRS += powerpc motovec +libs += powerpc/libpowerpc.la motovec/libmotovec.la if HAVE_ASM_BLOCKS -subdir_powerpc += powerpc_asm_blocks -libs_powerpc += powerpc/libpowerpc_asm_blocks.la +SUBDIRS += powerpc_asm_blocks +libs += powerpc/libpowerpc_asm_blocks.la +endif endif -else -subdir_powerpc = -libs_powerpc = endif -DIST_SUBDIRS = amd64 3dnow c colorspace conv copy dct fb i386 jpeg math md5 mmx motovec powerpc powerpc_asm_blocks ref simdpack sse utf8 deprecated -SUBDIRS = $(subdir_amd64) c colorspace conv copy dct $(subdir_i386) jpeg math md5 $(subdir_powerpc) ref simdpack utf8 deprecated - lib_LTLIBRARIES = liboil-@LIBOIL_MAJORMINOR@.la if USE_NEW_ABI lib_LTLIBRARIES += liboil-@LIBOIL_OLD_MAJORMINOR@.la @@ -92,12 +80,10 @@ liboilfunctions_la_LIBADD = \ colorspace/libcolorspace.la \ copy/libcopy.la \ dct/libdct.la \ - $(libs_amd64) \ - $(libs_i386) \ + $(libs) \ jpeg/libjpeg.la \ math/libmath.la \ md5/libmd5.la \ - $(libs_powerpc) \ ref/libref.la \ simdpack/libsimdpack.la \ utf8/libutf8.la diff --git a/liboil/i386/Makefile.am b/liboil/i386/Makefile.am index 5437785..9f67519 100644 --- a/liboil/i386/Makefile.am +++ b/liboil/i386/Makefile.am @@ -12,6 +12,7 @@ libi386_la_SOURCES = \ error8x8_i386.c \ idct8x8_i386.c \ md5_i386.c \ + mt19937.c \ mult8x8_i386.c \ recon8x8_i386.c \ resample.c \ diff --git a/liboil/i386/mt19937.c b/liboil/i386/mt19937.c new file mode 100644 index 0000000..ad389ea --- /dev/null +++ b/liboil/i386/mt19937.c @@ -0,0 +1,301 @@ + + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <liboil/liboilfunction.h> +#include <liboil/liboilclasses.h> + + +#define TWIST(next,pair) \ + " movd 0(%1), %%mm0 \n" \ + " movd " #next "(%1), %%mm1 \n" \ + " pand %%mm7, %%mm0 \n" \ + " pand %%mm6, %%mm1 \n" \ + " por %%mm1, %%mm0 \n" \ + " pslld $31, %%mm1 \n" \ + " psrad $31, %%mm1 \n" \ + " pand %%mm5, %%mm1 \n" \ + " psrld $1, %%mm0 \n" \ + " pxor %%mm1, %%mm0 \n" \ + " pxor " #pair "(%1), %%mm0 \n" \ + " movd %%mm0, 0(%1) \n" + +#define TWIST2(next,pair) \ + " movq 0(%1), %%mm0 \n" \ + " movq " #next "(%1), %%mm1 \n" \ + " pand %%mm7, %%mm0 \n" \ + " pand %%mm6, %%mm1 \n" \ + " por %%mm1, %%mm0 \n" \ + " pslld $31, %%mm1 \n" \ + " psrad $31, %%mm1 \n" \ + " pand %%mm5, %%mm1 \n" \ + " psrld $1, %%mm0 \n" \ + " pxor %%mm1, %%mm0 \n" \ + " pxor " #pair "(%1), %%mm0 \n" \ + " movq %%mm0, 0(%1) \n" + +#define TWIST4(next,pair) \ + " movq 0(%1), %%mm0 \n" \ + " movq 8(%1), %%mm2 \n" \ + " movq " #next "(%1), %%mm1 \n" \ + " movq (8+" #next ")(%1), %%mm3 \n" \ + " pand %%mm7, %%mm0 \n" \ + " pand %%mm7, %%mm2 \n" \ + " pand %%mm6, %%mm1 \n" \ + " pand %%mm6, %%mm3 \n" \ + " por %%mm1, %%mm0 \n" \ + " por %%mm3, %%mm2 \n" \ + " pslld $31, %%mm1 \n" \ + " pslld $31, %%mm3 \n" \ + " psrad $31, %%mm1 \n" \ + " psrad $31, %%mm3 \n" \ + " pand %%mm5, %%mm1 \n" \ + " pand %%mm5, %%mm3 \n" \ + " psrld $1, %%mm0 \n" \ + " psrld $1, %%mm2 \n" \ + " pxor %%mm1, %%mm0 \n" \ + " pxor %%mm3, %%mm2 \n" \ + " pxor " #pair "(%1), %%mm0 \n" \ + " pxor (8+" #pair ")(%1), %%mm2 \n" \ + " movq %%mm0, 0(%1) \n" \ + " movq %%mm2, 8(%1) \n" + + +static void +mt19937_i386_mmx (uint32_t *d, uint32_t *mt) +{ + asm volatile ( + " mov $0x80000000, %2\n" + " movd %2, %%mm7\n" + " mov $0x7fffffff, %2\n" + " movd %2, %%mm6\n" + " mov $0x9908b0df, %2\n" + " movd %2, %%mm5\n" + " mov $0x1, %2\n" + " movd %2, %%mm4\n" + " mov $227, %2 \n" + + "1: \n" + TWIST(4,1588) + " add $4, %1\n" + " decl %2 \n" + " jne 1b \n" + + " mov $396, %2 \n" + "2: \n" + TWIST(4,-908) + " add $4, %1\n" + " decl %2 \n" + " jne 2b \n" + + TWIST(-2492,-908) + + " add $-2492, %1 \n" + + " mov $0x9d2c5680, %2\n" + " movd %2, %%mm7\n" + " mov $0xefc60000, %2\n" + " movd %2, %%mm6\n" + " mov $624, %2 \n" + "3:\n" + " movd 0(%1), %%mm0 \n" + " movd 0(%1), %%mm1 \n" + " psrld $11, %%mm1 \n" + " pxor %%mm1, %%mm0 \n" + " movq %%mm0, %%mm1 \n" + " pslld $7, %%mm1 \n" + " pand %%mm7, %%mm1 \n" + " pxor %%mm1, %%mm0 \n" + " movq %%mm0, %%mm1 \n" + " pslld $15, %%mm1 \n" + " pand %%mm6, %%mm1 \n" + " pxor %%mm1, %%mm0 \n" + " movq %%mm0, %%mm1 \n" + " psrld $18, %%mm1 \n" + " pxor %%mm1, %%mm0 \n" + " movd %%mm0, 0(%0) \n" + " add $4, %0\n" + " add $4, %1\n" + " decl %2 \n" + " jne 3b \n" + " emms \n" + + : "+r" (d), "+r" (mt) + : "r" (0) + ); + +} +OIL_DEFINE_IMPL_FULL (mt19937_i386_mmx, mt19937, OIL_IMPL_FLAG_MMX); + +static void +mt19937_i386_mmx_2 (uint32_t *d, uint32_t *mt) +{ + asm volatile ( + " mov $0x80000000, %2\n" + " movd %2, %%mm7\n" + " pshufw $0x44, %%mm7, %%mm7 \n" + " mov $0x7fffffff, %2\n" + " movd %2, %%mm6\n" + " pshufw $0x44, %%mm6, %%mm6 \n" + " mov $0x9908b0df, %2\n" + " movd %2, %%mm5\n" + " pshufw $0x44, %%mm5, %%mm5 \n" + " mov $0x1, %2\n" + " movd %2, %%mm4\n" + " pshufw $0x44, %%mm4, %%mm4 \n" + + " mov $113, %2 \n" + "1: \n" + TWIST2(4,1588) + " add $8, %1\n" + " decl %2 \n" + " jne 1b \n" + + TWIST(4,1588) + " add $4, %1\n" + + " mov $198, %2 \n" + "2: \n" + TWIST2(4,-908) + " add $8, %1\n" + " decl %2 \n" + " jne 2b \n" + + TWIST(-2492,-908) + + " add $-2492, %1 \n" + + " mov $0x9d2c5680, %2\n" + " movd %2, %%mm7\n" + " pshufw $0x44, %%mm7, %%mm7 \n" + " mov $0xefc60000, %2\n" + " movd %2, %%mm6\n" + " pshufw $0x44, %%mm6, %%mm6 \n" + " mov $312, %2 \n" + "3:\n" + " movq 0(%1), %%mm0 \n" + " movq 0(%1), %%mm1 \n" + " psrld $11, %%mm1 \n" + " pxor %%mm1, %%mm0 \n" + " movq %%mm0, %%mm1 \n" + " pslld $7, %%mm1 \n" + " pand %%mm7, %%mm1 \n" + " pxor %%mm1, %%mm0 \n" + " movq %%mm0, %%mm1 \n" + " pslld $15, %%mm1 \n" + " pand %%mm6, %%mm1 \n" + " pxor %%mm1, %%mm0 \n" + " movq %%mm0, %%mm1 \n" + " psrld $18, %%mm1 \n" + " pxor %%mm1, %%mm0 \n" + " movq %%mm0, 0(%0) \n" + " add $8, %0\n" + " add $8, %1\n" + " decl %2 \n" + " jne 3b \n" + " emms \n" + + : "+r" (d), "+r" (mt) + : "r" (0) + ); + +} +OIL_DEFINE_IMPL_FULL (mt19937_i386_mmx_2, mt19937, OIL_IMPL_FLAG_MMX); + + +static void +mt19937_i386_mmx_3 (uint32_t *d, uint32_t *mt) +{ + asm volatile ( + " mov $0x80000000, %2\n" + " movd %2, %%mm7\n" + " pshufw $0x44, %%mm7, %%mm7 \n" + " mov $0x7fffffff, %2\n" + " movd %2, %%mm6\n" + " pshufw $0x44, %%mm6, %%mm6 \n" + " mov $0x9908b0df, %2\n" + " movd %2, %%mm5\n" + " pshufw $0x44, %%mm5, %%mm5 \n" + " mov $0x1, %2\n" + " movd %2, %%mm4\n" + + " mov $56, %2 \n" + "1: \n" + TWIST4(4,1588) + " add $16, %1\n" + " decl %2 \n" + " jne 1b \n" + + TWIST2(4,1588) + " add $8, %1\n" + + TWIST(4,1588) + " add $4, %1\n" + + " mov $99, %2 \n" + "2: \n" + TWIST4(4,-908) + " add $16, %1\n" + " decl %2 \n" + " jne 2b \n" + + TWIST(-2492,-908) + + " add $-2492, %1 \n" + + " mov $0x9d2c5680, %2\n" + " movd %2, %%mm7\n" + " pshufw $0x44, %%mm7, %%mm7 \n" + " mov $0xefc60000, %2\n" + " movd %2, %%mm6\n" + " pshufw $0x44, %%mm6, %%mm6 \n" + " mov $156, %2 \n" + "3:\n" + " movq 0(%1), %%mm0 \n" + " movq 8(%1), %%mm2 \n" + " movq 0(%1), %%mm1 \n" + " movq 8(%1), %%mm3 \n" + " psrld $11, %%mm1 \n" + " psrld $11, %%mm3 \n" + " pxor %%mm1, %%mm0 \n" + " pxor %%mm3, %%mm2 \n" + " movq %%mm0, %%mm1 \n" + " movq %%mm2, %%mm3 \n" + " pslld $7, %%mm1 \n" + " pslld $7, %%mm3 \n" + " pand %%mm7, %%mm1 \n" + " pand %%mm7, %%mm3 \n" + " pxor %%mm1, %%mm0 \n" + " pxor %%mm3, %%mm2 \n" + " movq %%mm0, %%mm1 \n" + " movq %%mm2, %%mm3 \n" + " pslld $15, %%mm1 \n" + " pslld $15, %%mm3 \n" + " pand %%mm6, %%mm1 \n" + " pand %%mm6, %%mm3 \n" + " pxor %%mm1, %%mm0 \n" + " pxor %%mm3, %%mm2 \n" + " movq %%mm0, %%mm1 \n" + " movq %%mm2, %%mm3 \n" + " psrld $18, %%mm1 \n" + " psrld $18, %%mm3 \n" + " pxor %%mm1, %%mm0 \n" + " pxor %%mm3, %%mm2 \n" + " movq %%mm0, 0(%0) \n" + " movq %%mm2, 8(%0) \n" + " add $16, %0\n" + " add $16, %1\n" + " decl %2 \n" + " jne 3b \n" + " emms \n" + + : "+r" (d), "+r" (mt) + : "r" (0) + ); + +} +OIL_DEFINE_IMPL_FULL (mt19937_i386_mmx_3, mt19937, OIL_IMPL_FLAG_MMX); + + diff --git a/liboil/liboilclasses.h b/liboil/liboilclasses.h index 6e4eb3f..9a8f964 100644 --- a/liboil/liboilclasses.h +++ b/liboil/liboilclasses.h @@ -266,6 +266,7 @@ OIL_DECLARE_CLASS(merge_linear_argb); OIL_DECLARE_CLASS(merge_linear_u8); OIL_DECLARE_CLASS(minimum_f32); OIL_DECLARE_CLASS(mix_u8); +OIL_DECLARE_CLASS(mt19937); OIL_DECLARE_CLASS(mult8x8_s16); OIL_DECLARE_CLASS(multiply_f32); OIL_DECLARE_CLASS(multsum_f32); diff --git a/liboil/liboilfuncs-04.h b/liboil/liboilfuncs-04.h index 2b13f52..c0144be 100644 --- a/liboil/liboilfuncs-04.h +++ b/liboil/liboilfuncs-04.h @@ -266,6 +266,7 @@ void oil_merge_linear_argb (uint32_t * d_n, const uint32_t * s_n, const uint32_t void oil_merge_linear_u8 (uint8_t * d_n, const uint8_t * s_n, const uint8_t * s2_n, const uint32_t * s3_1, int n); void oil_minimum_f32 (float * d, const float * s1, const float * s2, int n); void oil_mix_u8 (uint8_t * dest, const uint8_t * src1, const uint8_t * src2, const uint8_t * src3, int n); +void oil_mt19937 (uint32_t * d_624, uint32_t * i_624); void oil_mult8x8_s16 (int16_t * d_8x8, const int16_t * s1_8x8, const int16_t * s2_8x8, int ds, int ss1, int ss2); void oil_multiply_f32 (float * d, const float * s1, const float * s2, int n); void oil_multsum_f32 (float * dest, const float * src1, int sstr1, const float * src2, int sstr2, int n); diff --git a/liboil/liboilfuncs.h b/liboil/liboilfuncs.h index ef110dd..f8a6216 100644 --- a/liboil/liboilfuncs.h +++ b/liboil/liboilfuncs.h @@ -726,6 +726,9 @@ typedef void (*_oil_type_minimum_f32)(float * d, const float * s1, const float * extern OilFunctionClass *oil_function_class_ptr_mix_u8; typedef void (*_oil_type_mix_u8)(uint8_t * dest, const uint8_t * src1, const uint8_t * src2, const uint8_t * src3, int n); #define oil_mix_u8 ((_oil_type_mix_u8)(*(void **)oil_function_class_ptr_mix_u8)) +extern OilFunctionClass *oil_function_class_ptr_mt19937; +typedef void (*_oil_type_mt19937)(uint32_t * d_624, uint32_t * i_624); +#define oil_mt19937 ((_oil_type_mt19937)(*(void **)oil_function_class_ptr_mt19937)) extern OilFunctionClass *oil_function_class_ptr_mult8x8_s16; typedef void (*_oil_type_mult8x8_s16)(int16_t * d_8x8, const int16_t * s1_8x8, const int16_t * s2_8x8, int ds, int ss1, int ss2); #define oil_mult8x8_s16 ((_oil_type_mult8x8_s16)(*(void **)oil_function_class_ptr_mult8x8_s16)) diff --git a/liboil/liboilfunction.h b/liboil/liboilfunction.h index 106c1d4..3ca3757 100644 --- a/liboil/liboilfunction.h +++ b/liboil/liboilfunction.h @@ -148,29 +148,47 @@ struct _OilFunctionImpl { /** * OilImplFlag: * - * Implementation flags independent of CPU type. + * Implementation flags. + * + * @OIL_IMPL_FLAG_REF: is the reference implementation for the class. + * + * @OIL_IMPL_FLAG_OPT: was compiled with alternate CFLAGS as specified + * by --enable-alternate-optimization. + * + * @OIL_IMPL_FLAG_ASM: is written in assembly code. + * + * @OIL_IMPL_FLAG_DISABLED: is disabled. This can be set either in the + * source code or during library initialization. + * + * @OIL_IMPL_FLAG_CMOV: uses the i386 instruction cmov or its variants. + * + * @OIL_IMPL_FLAG_MMX: uses MMX instructions. + * + * @OIL_IMPL_FLAG_SSE: uses SSE instructions. + * + * @OIL_IMPL_FLAG_MMXEXT: uses AMD's extended MMX instructions. These + * are a subset of what Intel calls SSE2. If an implementation uses + * only AMD's extended MMX instructions, it should set this flag, and + * not @OIL_IMPL_FLAG_SSE2. + * + * @OIL_IMPL_FLAG_SSE2: uses SSE2 instructions. This flag implies + * @OIL_IMPL_FLAG_SSE and @OIL_IMPL_FLAG_MMXEXT. + * + * @OIL_IMPL_FLAG_3DNOW: uses 3DNow! instructions. + * + * @OIL_IMPL_FLAG_3DNOWEXT: uses extended 3DNow! instructions. + * + * @OIL_IMPL_FLAG_SSE3: uses SSE3 instructions. This flag implies + * @OIL_IMPL_FLAG_SSE2. + * + * @OIL_IMPL_FLAG_ALTIVEC: uses Altivec instructions. + * */ typedef enum { OIL_IMPL_FLAG_REF = (1<<0), OIL_IMPL_FLAG_OPT = (1<<1), OIL_IMPL_FLAG_ASM = (1<<2), - OIL_IMPL_FLAG_DISABLED = (1<<3) -} OilImplFlag; - -/** - * OIL_CPU_FLAG_MASK: - * - * Mask describing which bits in #OilImplFlag depend on the current - * CPU. - */ -#define OIL_CPU_FLAG_MASK 0xffff0000 - -/** - * OilImplFlagI386: - * - * Implementation flags for CPU features on i386. - */ -typedef enum { + OIL_IMPL_FLAG_DISABLED = (1<<3), OIL_IMPL_FLAG_CMOV = (1<<16), OIL_IMPL_FLAG_MMX = (1<<17), OIL_IMPL_FLAG_SSE = (1<<18), @@ -178,17 +196,17 @@ typedef enum { OIL_IMPL_FLAG_SSE2 = (1<<20), OIL_IMPL_FLAG_3DNOW = (1<<21), OIL_IMPL_FLAG_3DNOWEXT = (1<<22), - OIL_IMPL_FLAG_SSE3 = (1<<23) -} OilImplFlagI386; + OIL_IMPL_FLAG_SSE3 = (1<<23), + OIL_IMPL_FLAG_ALTIVEC = (1<<24) +} OilImplFlag; /** - * OilImplFlagPowerPC: + * OIL_CPU_FLAG_MASK: * - * Implementation flags for CPU features on PowerPC. + * Mask describing which bits in #OilImplFlag depend on the current + * CPU. */ -typedef enum { - OIL_IMPL_FLAG_ALTIVEC = (1<<16) -} OilImplFlagPowerPC; +#define OIL_CPU_FLAG_MASK 0xffff0000 /** * OIL_DECLARE_CLASS: diff --git a/liboil/liboilrandom.c b/liboil/liboilrandom.c index eb657a1..79dd6e8 100644 --- a/liboil/liboilrandom.c +++ b/liboil/liboilrandom.c @@ -51,8 +51,11 @@ _oil_random_bits (void *dest, int n) /** * oil_random_s32: + * @dest: + * @n: * - * Evaluates to a random integer in the range [-(1<<31), (1<<31)-1]. + * Writes random values in the range [-(1<<31), (1<<31)-1] to the + * destination array. */ void oil_random_s32(oil_type_s32 *dest, int n) @@ -63,7 +66,8 @@ oil_random_s32(oil_type_s32 *dest, int n) /** * oil_random_s64: * - * Evaluates to a random integer in the range [-(1<<63), (1<<63)-1]. + * Writes random values in the range [-(1<<63), (1<<63)-1] to the + * destination array. */ void oil_random_s64 (oil_type_s64 *dest, int n) @@ -74,7 +78,8 @@ oil_random_s64 (oil_type_s64 *dest, int n) /** * oil_random_s16: * - * Evaluates to a random integer in the range [-(1<<15), (1<<15)-1]. + * Writes random values in the range [-(1<<15), (1<<15)-1] to the + * destination array. */ void oil_random_s16 (oil_type_s16 *dest, int n) @@ -85,7 +90,8 @@ oil_random_s16 (oil_type_s16 *dest, int n) /** * oil_random_s8: * - * Evaluates to a random integer in the range [-(1<<7), (1<<7)-1]. + * Writes random values in the range [-(1<<7), (1<<7)-1] to the + * destination array. */ void oil_random_s8 (oil_type_s8 *dest, int n) @@ -96,7 +102,8 @@ oil_random_s8 (oil_type_s8 *dest, int n) /** * oil_random_u32: * - * Evaluates to a random integer in the range [0, (1<<32)-1]. + * Writes random values in the range [0, (1<<32)-1] to the + * destination array. */ void oil_random_u32 (oil_type_u32 *dest, int n) @@ -107,7 +114,8 @@ oil_random_u32 (oil_type_u32 *dest, int n) /** * oil_random_u64: * - * Evaluates to a random integer in the range [0, (1<<64)-1]. + * Writes random values in the range [0, (1<<64)-1] to the + * destination array. */ void oil_random_u64 (oil_type_u64 *dest, int n) @@ -118,7 +126,8 @@ oil_random_u64 (oil_type_u64 *dest, int n) /** * oil_random_u16: * - * Evaluates to a random integer in the range [0, (1<<16)-1]. + * Writes random values in the range [0, (1<<16)-1] to the + * destination array. */ void oil_random_u16 (oil_type_u16 *dest, int n) @@ -129,7 +138,8 @@ oil_random_u16 (oil_type_u16 *dest, int n) /** * oil_random_u8: * - * Evaluates to a random integer in the range [0, (1<<8)-1]. + * Writes random values in the range [0, (1<<8)-1] to the + * destination array. */ void oil_random_u8 (oil_type_u8 *dest, int n) @@ -140,8 +150,8 @@ oil_random_u8 (oil_type_u8 *dest, int n) /** * oil_random_f64: * - * Evaluates to a random double-precision floating point number - * in the range [0, 1.0). + * Writes random double-precision floating point values in the + * range [0, 1.0) to the destination array. */ void oil_random_f64 (oil_type_f64 *dest, int n) @@ -155,8 +165,8 @@ oil_random_f64 (oil_type_f64 *dest, int n) /** * oil_random_f32: * - * Evaluates to a random single-precision floating point number - * in the range [0, 1.0). + * Writes random single-precision floating point values in the + * range [0, 1.0) to the destination array. */ void oil_random_f32 (oil_type_f32 *dest, int n) @@ -170,7 +180,8 @@ oil_random_f32 (oil_type_f32 *dest, int n) /** * oil_random_alpha: * - * Evaluates a random alpha value. This is similar to oil_random_u8(), + * Writes random values in the range [0, 255] to the destination + * array suitable for alpha values. This is similar to oil_random_u8(), * except the values 0 and 255 are strongly favored. */ void @@ -188,12 +199,11 @@ oil_random_alpha(uint8_t *dest, int n) /** * oil_random_argb: - * @a: an alpha value + * @dest: destination array. + * @n: number of values to write. * - * Creates a valid random RGBA value with the alpha value @a. Valid - * input values for @a are [0,255]. - * - * Evaluates to the result. + * Creates valid random RGBA values and places them in the destination + * array. */ void oil_random_argb(uint32_t *dest, int n) diff --git a/liboil/liboilrandom.h b/liboil/liboilrandom.h index 796e8f5..64394f1 100644 --- a/liboil/liboilrandom.h +++ b/liboil/liboilrandom.h @@ -30,10 +30,6 @@ #include <stdlib.h> -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - #include <liboil/liboiltypes.h> #include <stdlib.h> diff --git a/liboil/liboiltest.c b/liboil/liboiltest.c index f7211a9..5023afa 100644 --- a/liboil/liboiltest.c +++ b/liboil/liboiltest.c @@ -180,7 +180,7 @@ oil_test_set_test_header (OilTest *test, OilParameter *p, int test_header) * oil_test_set_test_footer: * @test: the OilTest * @p: the OilParameter to change the footer for - * @test_header: the number of bytes of guard footer + * @test_footer: the number of bytes of guard footer * * Sets the number of bytes of guard footer for @p to @test_footer. */ diff --git a/liboil/liboiltrampolines.c b/liboil/liboiltrampolines.c index 75c5378..d633c82 100644 --- a/liboil/liboiltrampolines.c +++ b/liboil/liboiltrampolines.c @@ -2331,6 +2331,16 @@ oil_mix_u8 (uint8_t * dest, const uint8_t * src1, const uint8_t * src2, const ui ((void (*)(uint8_t * dest, const uint8_t * src1, const uint8_t * src2, const uint8_t * src3, int n))(_oil_function_class_mix_u8.func))(dest, src1, src2, src3, n); } +#undef oil_mt19937 +void +oil_mt19937 (uint32_t * d_624, uint32_t * i_624) +{ + if (_oil_function_class_mt19937.func == NULL) { + oil_class_optimize (&_oil_function_class_mt19937); + } + ((void (*)(uint32_t * d_624, uint32_t * i_624))(_oil_function_class_mt19937.func))(d_624, i_624); +} + #undef oil_mult8x8_s16 void oil_mult8x8_s16 (int16_t * d_8x8, const int16_t * s1_8x8, const int16_t * s2_8x8, int ds, int ss1, int ss2) diff --git a/liboil/motovec/Makefile.am b/liboil/motovec/Makefile.am index 59665ff..10660a2 100644 --- a/liboil/motovec/Makefile.am +++ b/liboil/motovec/Makefile.am @@ -1,19 +1,10 @@ noinst_LTLIBRARIES = libmotovec.la -c_sources = - -if HAVE_GCC_POWERPC -powerpc_sources = \ +libmotovec_la_SOURCES = \ motovec.c \ vec_memcpy.S \ vec_memset.S -else -powerpc_sources = -endif - -libmotovec_la_SOURCES = \ - $(powerpc_sources) libmotovec_la_LIBADD = libmotovec_la_CFLAGS = $(LIBOIL_CFLAGS) libmotovec_la_CCASFLAGS = $(LIBOIL_CFLAGS) diff --git a/liboil/ref/Makefile.am b/liboil/ref/Makefile.am index 2b565b1..cadb191 100644 --- a/liboil/ref/Makefile.am +++ b/liboil/ref/Makefile.am @@ -20,6 +20,7 @@ c_sources = \ error8x8.c \ math.c \ mix_u8.c \ + mt19937ar.c \ mult8x8_s16.c \ multsum.c \ recon8x8.c \ diff --git a/liboil/ref/mt19937ar.c b/liboil/ref/mt19937ar.c new file mode 100644 index 0000000..ae0ce2d --- /dev/null +++ b/liboil/ref/mt19937ar.c @@ -0,0 +1,93 @@ +/* + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) +*/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <liboil/liboilfunction.h> + +/* Period parameters */ +#define N 624 +#define M 397 +#define MATRIX_A 0x9908b0dfUL /* constant vector a */ +#define UPPER_MASK 0x80000000UL /* most significant w-r bits */ +#define LOWER_MASK 0x7fffffffUL /* least significant r bits */ + + +OIL_DEFINE_CLASS(mt19937, "uint32_t *d_624, uint32_t *i_624"); + +/* mag01[x] = x * MATRIX_A for x=0,1 */ +static const uint32_t mag01[2]={0x0UL, MATRIX_A}; + +static void +mt19937_ref (uint32_t *d, uint32_t *mt) +{ + uint32_t y; + int kk; + + for (kk=0;kk<N-M;kk++) { + y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK); + mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + for (;kk<N-1;kk++) { + y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK); + mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK); + mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL]; + + for(kk=0;kk<N;kk++){ + y = mt[kk]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + d[kk] = y; + } +} +OIL_DEFINE_IMPL_REF (mt19937_ref, mt19937); + diff --git a/license_block b/license_block index f0978a0..30b7e5f 100644 --- a/license_block +++ b/license_block @@ -1,6 +1,6 @@ /* * LIBOIL - Library of Optimized Inner Loops - * Copyright (c) 2005 David A. Schleef <ds@schleef.org> + * Copyright (c) 2006 David A. Schleef <ds@schleef.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/m4/as-host-defines.m4 b/m4/as-host-defines.m4 new file mode 100644 index 0000000..ed64d64 --- /dev/null +++ b/m4/as-host-defines.m4 @@ -0,0 +1,33 @@ +dnl as-host-defines.m4 0.1.0 + +dnl autostars m4 macro for generating defines for various host architectures + +dnl David Schleef <ds@schleef.org> + +dnl $Id: as-host-defines.m4,v 1.1 2006-01-31 02:20:03 ds Exp $ + +dnl AS_HOST_DEFINES() + +AC_DEFUN([AS_HOST_DEFINES], +[ + case "x${host_cpu}" in + xi?86 | k?) + HAVE_I386=yes + AC_DEFINE(HAVE_I386, 1, [Defined if host is i386]) + ;; + xx86_64|xamd64) + HAVE_AMD64=yes + AC_DEFINE(HAVE_AMD64, 1, [Defined if host is amd64]) + ;; + xpowerpc|xppc) + HAVE_POWERPC=yes + AC_DEFINE(HAVE_POWERPC, 1, [Defined if host is powerpc]) + ;; + esac + +AM_CONDITIONAL(HAVE_I386, test "x$HAVE_I386" = "xyes") +AM_CONDITIONAL(HAVE_AMD64, test "x$HAVE_AMD64" = "xyes") +AM_CONDITIONAL(HAVE_POWERPC, test "x$HAVE_POWERPC" = "xyes") + +]) + diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am index 9ada1f6..e5dcd45 100644 --- a/testsuite/Makefile.am +++ b/testsuite/Makefile.am @@ -2,7 +2,7 @@ SUBDIRS = instruction programs = align moo introspect proto1 proto2 test1 proto3 proto4 stride \ - dso_check abs md5 md5_profile trans copy zigzag + dso_check abs md5 md5_profile trans copy zigzag mmx_engine check_PROGRAMS = $(programs) noinst_PROGRAMS = list_impls diff --git a/testsuite/instruction/Makefile.am b/testsuite/instruction/Makefile.am index fd7c30c..86651ec 100644 --- a/testsuite/instruction/Makefile.am +++ b/testsuite/instruction/Makefile.am @@ -3,12 +3,6 @@ EXTRA_DIST = check-instructions.pl check_PROGRAMS = list-impls -if HAVE_GCC_I386 -#check_DATA = report -else -#check_DATA = -endif - AM_LDFLAGS = $(LIBOIL_LIBS) AM_CFLAGS = $(LIBOIL_CFLAGS) diff --git a/testsuite/mmx_engine.c b/testsuite/mmx_engine.c new file mode 100644 index 0000000..fa7e7c4 --- /dev/null +++ b/testsuite/mmx_engine.c @@ -0,0 +1,221 @@ +/* + * LIBOIL - Library of Optimized Inner Loops + * Copyright (c) 2006 David A. Schleef <ds@schleef.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> +#include <liboil/liboil.h> + +#include <liboil/liboilprototype.h> +#include <liboil/liboiltest.h> +#include <liboil/liboilcpu.h> +#include <liboil/liboilrandom.h> + +void mmx_engine_test(void); + +int main (int argc, char *argv[]) +{ + uint32_t cpu_flags; + + oil_init (); + + cpu_flags = oil_cpu_get_flags (); + if (!(cpu_flags & OIL_IMPL_FLAG_MMX)) { + printf("No MMX.\n"); + exit(0); + } + +#ifdef HAVE_GCC_ASM +#if defined(HAVE_I386) || defined(HAVE_AMD64) + mmx_engine_test(); +#endif +#endif + + return 0; +} + +#ifdef HAVE_GCC_ASM +#if defined(HAVE_I386) || defined(HAVE_AMD64) +void mmx_engine_test(void) +{ + OilProfile prof; + double ave, std; + int i; + +#define CHECK_LATENCY(insn) \ + oil_profile_init (&prof); \ + for(i=0;i<10;i++) { \ + oil_profile_start(&prof); \ + asm volatile ( \ + " mov $1000, %%ecx\n" \ + ".align 16\n" \ + "1:\n" \ + " " #insn " %%mm0, %%mm1\n" \ + " " #insn " %%mm1, %%mm2\n" \ + " " #insn " %%mm2, %%mm3\n" \ + " " #insn " %%mm3, %%mm0\n" \ + " decl %%ecx\n" \ + " jne 1b\n" \ + " emms\n" \ + :::"ecx"); \ + oil_profile_stop(&prof); \ + } \ + oil_profile_get_ave_std(&prof, &ave, &std); \ + ave -= 40; \ + printf("latency of " #insn ": %g +/- %g\n", ave/4000, std/4000); \ + + CHECK_LATENCY(packssdw) + CHECK_LATENCY(packsswb) + CHECK_LATENCY(packuswb) + CHECK_LATENCY(paddb) + CHECK_LATENCY(paddd) + CHECK_LATENCY(paddsb) + CHECK_LATENCY(paddsw) + CHECK_LATENCY(paddusb) + CHECK_LATENCY(paddusw) + CHECK_LATENCY(paddw) + CHECK_LATENCY(pand) + CHECK_LATENCY(pandn) + CHECK_LATENCY(pcmpeqb) + CHECK_LATENCY(pcmpeqd) + CHECK_LATENCY(pcmpeqw) + CHECK_LATENCY(pcmpgtb) + CHECK_LATENCY(pcmpgtd) + CHECK_LATENCY(pcmpgtw) + CHECK_LATENCY(pmaddwd) + CHECK_LATENCY(pmulhw) + CHECK_LATENCY(pmullw) + CHECK_LATENCY(pmulhuw) + CHECK_LATENCY(por) + CHECK_LATENCY(pslld) + CHECK_LATENCY(psllq) + CHECK_LATENCY(psllw) + CHECK_LATENCY(psrad) + CHECK_LATENCY(psraw) + CHECK_LATENCY(psrld) + CHECK_LATENCY(psrlq) + CHECK_LATENCY(psrlw) + CHECK_LATENCY(psubb) + CHECK_LATENCY(psubd) + CHECK_LATENCY(psubsb) + CHECK_LATENCY(psubsw) + CHECK_LATENCY(psubusb) + CHECK_LATENCY(psubusw) + CHECK_LATENCY(psubw) + CHECK_LATENCY(punpckhbw) + CHECK_LATENCY(punpckhdq) + CHECK_LATENCY(punpckhwd) + CHECK_LATENCY(punpcklbw) + CHECK_LATENCY(punpckldq) + CHECK_LATENCY(punpcklwd) + CHECK_LATENCY(pxor) + +#define CHECK_THROUGHPUT(insn) \ + oil_profile_init (&prof); \ + for(i=0;i<10;i++) { \ + oil_profile_start(&prof); \ + asm volatile ( \ + " mov $1000, %%ecx\n" \ + ".align 16\n" \ + "1:\n" \ + " " #insn " %%mm0, %%mm1\n" \ + " " #insn " %%mm2, %%mm3\n" \ + " " #insn " %%mm4, %%mm5\n" \ + " " #insn " %%mm6, %%mm7\n" \ + " " #insn " %%mm0, %%mm1\n" \ + " " #insn " %%mm2, %%mm3\n" \ + " " #insn " %%mm4, %%mm5\n" \ + " " #insn " %%mm6, %%mm7\n" \ + " " #insn " %%mm0, %%mm1\n" \ + " " #insn " %%mm2, %%mm3\n" \ + " " #insn " %%mm4, %%mm5\n" \ + " " #insn " %%mm6, %%mm7\n" \ + " " #insn " %%mm0, %%mm1\n" \ + " " #insn " %%mm2, %%mm3\n" \ + " " #insn " %%mm4, %%mm5\n" \ + " " #insn " %%mm6, %%mm7\n" \ + " decl %%ecx\n" \ + " jne 1b\n" \ + " emms\n" \ + :::"ecx"); \ + oil_profile_stop(&prof); \ + } \ + oil_profile_get_ave_std(&prof, &ave, &std); \ + ave -= 40; \ + printf("throughput of " #insn ": %g +/- %g\n", ave/16000, std/16000); \ + + CHECK_THROUGHPUT(packssdw) + CHECK_THROUGHPUT(packsswb) + CHECK_THROUGHPUT(packuswb) + CHECK_THROUGHPUT(paddb) + CHECK_THROUGHPUT(paddd) + CHECK_THROUGHPUT(paddsb) + CHECK_THROUGHPUT(paddsw) + CHECK_THROUGHPUT(paddusb) + CHECK_THROUGHPUT(paddusw) + CHECK_THROUGHPUT(paddw) + CHECK_THROUGHPUT(pand) + CHECK_THROUGHPUT(pandn) + CHECK_THROUGHPUT(pcmpeqb) + CHECK_THROUGHPUT(pcmpeqd) + CHECK_THROUGHPUT(pcmpeqw) + CHECK_THROUGHPUT(pcmpgtb) + CHECK_THROUGHPUT(pcmpgtd) + CHECK_THROUGHPUT(pcmpgtw) + CHECK_THROUGHPUT(pmaddwd) + CHECK_THROUGHPUT(pmulhw) + CHECK_THROUGHPUT(pmullw) + CHECK_THROUGHPUT(pmulhuw) + CHECK_THROUGHPUT(por) + CHECK_THROUGHPUT(pslld) + CHECK_THROUGHPUT(psllq) + CHECK_THROUGHPUT(psllw) + CHECK_THROUGHPUT(psrad) + CHECK_THROUGHPUT(psraw) + CHECK_THROUGHPUT(psrld) + CHECK_THROUGHPUT(psrlq) + CHECK_THROUGHPUT(psrlw) + CHECK_THROUGHPUT(psubb) + CHECK_THROUGHPUT(psubd) + CHECK_THROUGHPUT(psubsb) + CHECK_THROUGHPUT(psubsw) + CHECK_THROUGHPUT(psubusb) + CHECK_THROUGHPUT(psubusw) + CHECK_THROUGHPUT(psubw) + CHECK_THROUGHPUT(punpckhbw) + CHECK_THROUGHPUT(punpckhdq) + CHECK_THROUGHPUT(punpckhwd) + CHECK_THROUGHPUT(punpcklbw) + CHECK_THROUGHPUT(punpckldq) + CHECK_THROUGHPUT(punpcklwd) + CHECK_THROUGHPUT(pxor) +} +#endif +#endif + |