diff options
author | Erik de Castro Lopo <erikd@mega-nerd.com> | 2014-09-21 09:28:36 +1000 |
---|---|---|
committer | Erik de Castro Lopo <erikd@mega-nerd.com> | 2014-09-21 09:34:32 +1000 |
commit | 71875b0c75d5ff686f3a9bf3adc268d6442eeabb (patch) | |
tree | 383f15a500cb9f48c10166cdd77d20a86a37fca0 /src/libFLAC/fixed_intrin_ssse3.c | |
parent | 6abc4803872bfc5fbe7a14b9e7e914cdf0fea560 (diff) | |
download | flac-71875b0c75d5ff686f3a9bf3adc268d6442eeabb.tar.gz |
fixed_intrin_sse[23].c : Add new, simpler SSE code.
It's simpler but not faster so it is disabled by default. Maybe it
will be faster on newer CPUs though..
Patch-from: lvqcl <lvqcl.mail@gmail.com>
Diffstat (limited to 'src/libFLAC/fixed_intrin_ssse3.c')
-rw-r--r-- | src/libFLAC/fixed_intrin_ssse3.c | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/src/libFLAC/fixed_intrin_ssse3.c b/src/libFLAC/fixed_intrin_ssse3.c index e444c71a..50c663d8 100644 --- a/src/libFLAC/fixed_intrin_ssse3.c +++ b/src/libFLAC/fixed_intrin_ssse3.c @@ -80,6 +80,7 @@ unsigned FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[ __m128i err0, err1; err0 = _mm_cvtsi32_si128(data[i]); // 0 0 0 e0 err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(0,0,0,0)); // e0 e0 e0 e0 +#if 1 /* OPT_SSE */ err1 = _mm_sub_epi32(err1, last_error); last_error = _mm_srli_si128(last_error, 4); // 0 le0 le1 le2 err1 = _mm_sub_epi32(err1, last_error); @@ -87,7 +88,11 @@ unsigned FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[ err1 = _mm_sub_epi32(err1, last_error); last_error = _mm_srli_si128(last_error, 4); // 0 0 0 le0 err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4 - +#else + last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 8)); // le0 le1 le2+le0 le3+le1 + last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 4)); // le0 le1+le0 le2+le0+le1 le3+le1+le2+le0 + err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4 +#endif last_error = _mm_alignr_epi8(err0, err1, 4); // e0 e1 e2 e3 err0 = _mm_abs_epi32(err0); @@ -166,6 +171,7 @@ unsigned FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 __m128i err0, err1; err0 = _mm_cvtsi32_si128(data[i]); // 0 0 0 e0 err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(0,0,0,0)); // e0 e0 e0 e0 +#if 1 /* OPT_SSE */ err1 = _mm_sub_epi32(err1, last_error); last_error = _mm_srli_si128(last_error, 4); // 0 le0 le1 le2 err1 = _mm_sub_epi32(err1, last_error); @@ -173,7 +179,11 @@ unsigned FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 err1 = _mm_sub_epi32(err1, last_error); last_error = _mm_srli_si128(last_error, 4); // 0 0 0 le0 err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4 - +#else + last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 8)); // le0 le1 le2+le0 le3+le1 + last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 4)); // le0 le1+le0 le2+le0+le1 le3+le1+le2+le0 + err1 = _mm_sub_epi32(err1, last_error); // e1 e2 e3 e4 +#endif last_error = _mm_alignr_epi8(err0, err1, 4); // e0 e1 e2 e3 err0 = _mm_abs_epi32(err0); |