diff options
author | David Schleef <ds@schleef.org> | 2006-05-19 06:56:08 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2006-05-19 06:56:08 +0000 |
commit | b917cd3ece2311a9542a0ea671be4aa74b720801 (patch) | |
tree | 4f0cd494b3e2f00ad76f69719e4771270175916d /liboil/simdpack | |
parent | 7a68b641e8cc685f89b0cb95e571f95ea137d8cd (diff) | |
download | liboil-b917cd3ece2311a9542a0ea671be4aa74b720801.tar.gz |
* liboil/liboilcpu.c: (oil_cpu_detect_arch): Oops, fix compile bug
from last checkin.
* liboil/simdpack/multsum.c: (multsum_f64_unroll8):
* liboil/sse/multsum_sse.c: New file.
* liboil/sse/Makefile.am:
Patch from Marcus Brubaker adding some multsum_f64 impls.
(Fixes #6957)
Diffstat (limited to 'liboil/simdpack')
-rw-r--r-- | liboil/simdpack/multsum.c | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/liboil/simdpack/multsum.c b/liboil/simdpack/multsum.c index 5c2c547..2c8a00f 100644 --- a/liboil/simdpack/multsum.c +++ b/liboil/simdpack/multsum.c @@ -55,3 +55,47 @@ static void multsum_f32_unroll2 (float *dest, float *src1, int sstr1, } OIL_DEFINE_IMPL (multsum_f32_unroll2, multsum_f32); + +static void multsum_f64_unroll8 (double *dest, double *src1, int sstr1, + double *src2, int sstr2, int n) +{ + int i = 0; + double sum = 0; + + while(i<n-7) { + sum += (OIL_GET(src1,0, double) * OIL_GET(src2,0, double)) + + (OIL_GET(src1,sstr1, double) * OIL_GET(src2,sstr2, double)) + + (OIL_GET(src1,2*sstr1, double) * OIL_GET(src2,2*sstr2, double)) + + (OIL_GET(src1,3*sstr1, double) * OIL_GET(src2,3*sstr2, double)) + + (OIL_GET(src1,4*sstr1, double) * OIL_GET(src2,4*sstr2, double)) + + (OIL_GET(src1,5*sstr1, double) * OIL_GET(src2,5*sstr2, double)) + + (OIL_GET(src1,6*sstr1, double) * OIL_GET(src2,6*sstr2, double)) + + (OIL_GET(src1,7*sstr1, double) * OIL_GET(src2,7*sstr2, double)); + OIL_INCREMENT (src1, sstr1*8); + OIL_INCREMENT (src2, sstr2*8); + i+=8; + } + while(i<n-3) { + sum += (OIL_GET(src1,0, double) * OIL_GET(src2,0, double)) + + (OIL_GET(src1,sstr1, double) * OIL_GET(src2,sstr2, double)) + + (OIL_GET(src1,2*sstr1, double) * OIL_GET(src2,2*sstr2, double)) + + (OIL_GET(src1,3*sstr1, double) * OIL_GET(src2,3*sstr2, double)); + OIL_INCREMENT (src1, sstr1*4); + OIL_INCREMENT (src2, sstr2*4); + i+=4; + } + while(i<n-1) { + sum += (OIL_GET(src1,0, double) * OIL_GET(src2,0, double)) + + (OIL_GET(src1,sstr1, double) * OIL_GET(src2,sstr2, double)); + OIL_INCREMENT (src1, sstr1*2); + OIL_INCREMENT (src2, sstr2*2); + i+=2; + } + if (i<n) { + sum += OIL_GET(src1,0, double) * OIL_GET(src2,0, double); + } + + *dest = sum; +} +OIL_DEFINE_IMPL (multsum_f64_unroll8, multsum_f64); + |