diff options
author | lvqcl <lvqcl.mail@gmail.com> | 2018-09-19 20:03:37 +0300 |
---|---|---|
committer | Erik de Castro Lopo <erikd@mega-nerd.com> | 2018-09-20 07:20:13 +1000 |
commit | 421961f00b505dcad41603305b47eda0b2ddfe92 (patch) | |
tree | 051990fb5c29ee092201be9562fa6479834a25f3 /src/libFLAC/lpc_intrin_sse41.c | |
parent | faafa4c82c31e5aed7bc7c0e87a379825372c6ac (diff) | |
download | flac-421961f00b505dcad41603305b47eda0b2ddfe92.tar.gz |
Replace hadd with shuffle + add
Diffstat (limited to 'src/libFLAC/lpc_intrin_sse41.c')
-rw-r--r-- | src/libFLAC/lpc_intrin_sse41.c | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/src/libFLAC/lpc_intrin_sse41.c b/src/libFLAC/lpc_intrin_sse41.c index 96dd20de..4ef3d3e4 100644 --- a/src/libFLAC/lpc_intrin_sse41.c +++ b/src/libFLAC/lpc_intrin_sse41.c @@ -980,8 +980,8 @@ void FLAC__lpc_restore_signal_intrin_sse41(const FLAC__int32 residual[], uint32_ summ = _mm_add_epi32(summ, _mm_mullo_epi32(dat[1], qlp[1])); summ = _mm_add_epi32(summ, _mm_mullo_epi32(dat[0], qlp[0])); - summ = _mm_hadd_epi32(summ, summ); - summ = _mm_hadd_epi32(summ, summ); + summ = _mm_add_epi32(summ, _mm_shuffle_epi32(summ, _MM_SHUFFLE(1,0,3,2))); + summ = _mm_add_epi32(summ, _mm_shufflelo_epi16(summ, _MM_SHUFFLE(1,0,3,2))); summ = _mm_sra_epi32(summ, cnt); temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ); @@ -1009,8 +1009,8 @@ void FLAC__lpc_restore_signal_intrin_sse41(const FLAC__int32 residual[], uint32_ for (i = 0;;) { summ = _mm_add_epi32(_mm_mullo_epi32(dat[1], qlp[1]), _mm_mullo_epi32(dat[0], qlp[0])); - summ = _mm_hadd_epi32(summ, summ); - summ = _mm_hadd_epi32(summ, summ); + summ = _mm_add_epi32(summ, _mm_shuffle_epi32(summ, _MM_SHUFFLE(1,0,3,2))); + summ = _mm_add_epi32(summ, _mm_shufflelo_epi16(summ, _MM_SHUFFLE(1,0,3,2))); summ = _mm_sra_epi32(summ, cnt); temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ); @@ -1079,8 +1079,8 @@ void FLAC__lpc_restore_signal_16_intrin_sse41(const FLAC__int32 residual[], uint summ = _mm_madd_epi16(dat[1], qlp[1]); summ = _mm_add_epi32(summ, _mm_madd_epi16(dat[0], qlp[0])); - summ = _mm_hadd_epi32(summ, summ); - summ = _mm_hadd_epi32(summ, summ); + summ = _mm_add_epi32(summ, _mm_shuffle_epi32(summ, _MM_SHUFFLE(1,0,3,2))); + summ = _mm_add_epi32(summ, _mm_shufflelo_epi16(summ, _MM_SHUFFLE(1,0,3,2))); summ = _mm_sra_epi32(summ, cnt); temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ); @@ -1109,8 +1109,8 @@ void FLAC__lpc_restore_signal_16_intrin_sse41(const FLAC__int32 residual[], uint for(i = 0;;) { summ = _mm_madd_epi16(dat0, qlp0); - summ = _mm_hadd_epi32(summ, summ); - summ = _mm_hadd_epi32(summ, summ); + summ = _mm_add_epi32(summ, _mm_shuffle_epi32(summ, _MM_SHUFFLE(1,0,3,2))); + summ = _mm_add_epi32(summ, _mm_shufflelo_epi16(summ, _MM_SHUFFLE(1,0,3,2))); summ = _mm_sra_epi32(summ, cnt); temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ); |