summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartijn van Beurden <mvanb1@gmail.com>2022-05-12 14:28:05 +0200
committerMartijn van Beurden <mvanb1@gmail.com>2022-05-26 11:04:05 +0200
commitfebff86af03d2ccbdd4826c9555336916eb6ecb3 (patch)
tree85a57ae946303c1ebc8e8cb025da46b07ab25503 /src
parenta67102694d07ef7a43f02598ce943f304e0f8839 (diff)
downloadflac-febff86af03d2ccbdd4826c9555336916eb6ecb3.tar.gz
Remove all assembler and intrinsics from decoder
This commit drops all use of assembler and intrinsics from the libFLAC decoder. This is because they are only for 32-bit x86, hard to debug, maintain and fuzz properly, and because the decoder has much greater security risks than the encoder.
Diffstat (limited to 'src')
-rw-r--r--src/libFLAC/ia32/lpc_asm.nasm655
-rw-r--r--src/libFLAC/include/private/lpc.h16
-rw-r--r--src/libFLAC/lpc_intrin_sse41.c544
-rw-r--r--src/libFLAC/stream_decoder.c57
4 files changed, 2 insertions, 1270 deletions
diff --git a/src/libFLAC/ia32/lpc_asm.nasm b/src/libFLAC/ia32/lpc_asm.nasm
index b6117605..af5fc309 100644
--- a/src/libFLAC/ia32/lpc_asm.nasm
+++ b/src/libFLAC/ia32/lpc_asm.nasm
@@ -38,9 +38,6 @@
cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
-cglobal FLAC__lpc_restore_signal_asm_ia32
-cglobal FLAC__lpc_restore_signal_asm_ia32_mmx
-cglobal FLAC__lpc_restore_signal_wide_asm_ia32
code_section
@@ -448,377 +445,6 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
; **********************************************************************
;
-; void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
-; {
-; unsigned i, j;
-; FLAC__int32 sum;
-;
-; FLAC__ASSERT(order > 0);
-;
-; for(i = 0; i < data_len; i++) {
-; sum = 0;
-; for(j = 0; j < order; j++)
-; sum += qlp_coeff[j] * data[i-j-1];
-; data[i] = residual[i] + (sum >> lp_quantization);
-; }
-; }
- ALIGN 16
-cident FLAC__lpc_restore_signal_asm_ia32
- ;[esp + 40] data[]
- ;[esp + 36] lp_quantization
- ;[esp + 32] order
- ;[esp + 28] qlp_coeff[]
- ;[esp + 24] data_len
- ;[esp + 20] residual[]
-
- ;ASSERT(order > 0)
-
- push ebp
- push ebx
- push esi
- push edi
-
- mov esi, [esp + 20] ; esi = residual[]
- mov edi, [esp + 40] ; edi = data[]
- mov eax, [esp + 32] ; eax = order
- mov ebx, [esp + 24] ; ebx = data_len
-
- test ebx, ebx
- jz near .end ; do nothing if data_len == 0
-
-.begin:
- cmp eax, byte 1
- jg short .x87_1more
-
- mov ecx, [esp + 28]
- mov edx, [ecx]
- mov eax, [edi - 4]
- mov ecx, [esp + 36]
- ALIGN 16
-.x87_1_loop_i:
- imul eax, edx
- sar eax, cl
- add eax, [esi]
- mov [edi], eax
- add esi, byte 4
- add edi, byte 4
- dec ebx
- jnz .x87_1_loop_i
-
- jmp .end
-
-.x87_1more:
- cmp eax, byte 32 ; for order <= 32 there is a faster routine
- jbe short .x87_32
-
- ; This version is here just for completeness, since FLAC__MAX_LPC_ORDER == 32
- ALIGN 16
-.x87_32more_loop_i:
- xor ebp, ebp
- mov ecx, [esp + 32]
- mov edx, ecx
- shl edx, 2
- add edx, [esp + 28]
- neg ecx
- ALIGN 16
-.x87_32more_loop_j:
- sub edx, byte 4
- mov eax, [edx]
- imul eax, [edi + 4 * ecx]
- add ebp, eax
- inc ecx
- jnz short .x87_32more_loop_j
-
- mov ecx, [esp + 36]
- sar ebp, cl
- add ebp, [esi]
- mov [edi], ebp
- add edi, byte 4
- add esi, byte 4
-
- dec ebx
- jnz .x87_32more_loop_i
-
- jmp .end
-
-.mov_eip_to_eax:
- mov eax, [esp]
- ret
-
-.x87_32:
- sub esi, edi
- neg eax
- lea edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
- call .mov_eip_to_eax
-.get_eip0:
- add edx, eax
- inc edx ; compensate for the shorter opcode on the last iteration
- mov eax, [esp + 28] ; eax = qlp_coeff[]
- xor ebp, ebp
- jmp edx
-
- mov ecx, [eax + 124] ; ecx = qlp_coeff[31]
- imul ecx, [edi - 128] ; ecx = qlp_coeff[31] * data[i-32]
- add ebp, ecx ; sum += qlp_coeff[31] * data[i-32]
- mov ecx, [eax + 120] ; ecx = qlp_coeff[30]
- imul ecx, [edi - 124] ; ecx = qlp_coeff[30] * data[i-31]
- add ebp, ecx ; sum += qlp_coeff[30] * data[i-31]
- mov ecx, [eax + 116] ; ecx = qlp_coeff[29]
- imul ecx, [edi - 120] ; ecx = qlp_coeff[29] * data[i-30]
- add ebp, ecx ; sum += qlp_coeff[29] * data[i-30]
- mov ecx, [eax + 112] ; ecx = qlp_coeff[28]
- imul ecx, [edi - 116] ; ecx = qlp_coeff[28] * data[i-29]
- add ebp, ecx ; sum += qlp_coeff[28] * data[i-29]
- mov ecx, [eax + 108] ; ecx = qlp_coeff[27]
- imul ecx, [edi - 112] ; ecx = qlp_coeff[27] * data[i-28]
- add ebp, ecx ; sum += qlp_coeff[27] * data[i-28]
- mov ecx, [eax + 104] ; ecx = qlp_coeff[26]
- imul ecx, [edi - 108] ; ecx = qlp_coeff[26] * data[i-27]
- add ebp, ecx ; sum += qlp_coeff[26] * data[i-27]
- mov ecx, [eax + 100] ; ecx = qlp_coeff[25]
- imul ecx, [edi - 104] ; ecx = qlp_coeff[25] * data[i-26]
- add ebp, ecx ; sum += qlp_coeff[25] * data[i-26]
- mov ecx, [eax + 96] ; ecx = qlp_coeff[24]
- imul ecx, [edi - 100] ; ecx = qlp_coeff[24] * data[i-25]
- add ebp, ecx ; sum += qlp_coeff[24] * data[i-25]
- mov ecx, [eax + 92] ; ecx = qlp_coeff[23]
- imul ecx, [edi - 96] ; ecx = qlp_coeff[23] * data[i-24]
- add ebp, ecx ; sum += qlp_coeff[23] * data[i-24]
- mov ecx, [eax + 88] ; ecx = qlp_coeff[22]
- imul ecx, [edi - 92] ; ecx = qlp_coeff[22] * data[i-23]
- add ebp, ecx ; sum += qlp_coeff[22] * data[i-23]
- mov ecx, [eax + 84] ; ecx = qlp_coeff[21]
- imul ecx, [edi - 88] ; ecx = qlp_coeff[21] * data[i-22]
- add ebp, ecx ; sum += qlp_coeff[21] * data[i-22]
- mov ecx, [eax + 80] ; ecx = qlp_coeff[20]
- imul ecx, [edi - 84] ; ecx = qlp_coeff[20] * data[i-21]
- add ebp, ecx ; sum += qlp_coeff[20] * data[i-21]
- mov ecx, [eax + 76] ; ecx = qlp_coeff[19]
- imul ecx, [edi - 80] ; ecx = qlp_coeff[19] * data[i-20]
- add ebp, ecx ; sum += qlp_coeff[19] * data[i-20]
- mov ecx, [eax + 72] ; ecx = qlp_coeff[18]
- imul ecx, [edi - 76] ; ecx = qlp_coeff[18] * data[i-19]
- add ebp, ecx ; sum += qlp_coeff[18] * data[i-19]
- mov ecx, [eax + 68] ; ecx = qlp_coeff[17]
- imul ecx, [edi - 72] ; ecx = qlp_coeff[17] * data[i-18]
- add ebp, ecx ; sum += qlp_coeff[17] * data[i-18]
- mov ecx, [eax + 64] ; ecx = qlp_coeff[16]
- imul ecx, [edi - 68] ; ecx = qlp_coeff[16] * data[i-17]
- add ebp, ecx ; sum += qlp_coeff[16] * data[i-17]
- mov ecx, [eax + 60] ; ecx = qlp_coeff[15]
- imul ecx, [edi - 64] ; ecx = qlp_coeff[15] * data[i-16]
- add ebp, ecx ; sum += qlp_coeff[15] * data[i-16]
- mov ecx, [eax + 56] ; ecx = qlp_coeff[14]
- imul ecx, [edi - 60] ; ecx = qlp_coeff[14] * data[i-15]
- add ebp, ecx ; sum += qlp_coeff[14] * data[i-15]
- mov ecx, [eax + 52] ; ecx = qlp_coeff[13]
- imul ecx, [edi - 56] ; ecx = qlp_coeff[13] * data[i-14]
- add ebp, ecx ; sum += qlp_coeff[13] * data[i-14]
- mov ecx, [eax + 48] ; ecx = qlp_coeff[12]
- imul ecx, [edi - 52] ; ecx = qlp_coeff[12] * data[i-13]
- add ebp, ecx ; sum += qlp_coeff[12] * data[i-13]
- mov ecx, [eax + 44] ; ecx = qlp_coeff[11]
- imul ecx, [edi - 48] ; ecx = qlp_coeff[11] * data[i-12]
- add ebp, ecx ; sum += qlp_coeff[11] * data[i-12]
- mov ecx, [eax + 40] ; ecx = qlp_coeff[10]
- imul ecx, [edi - 44] ; ecx = qlp_coeff[10] * data[i-11]
- add ebp, ecx ; sum += qlp_coeff[10] * data[i-11]
- mov ecx, [eax + 36] ; ecx = qlp_coeff[ 9]
- imul ecx, [edi - 40] ; ecx = qlp_coeff[ 9] * data[i-10]
- add ebp, ecx ; sum += qlp_coeff[ 9] * data[i-10]
- mov ecx, [eax + 32] ; ecx = qlp_coeff[ 8]
- imul ecx, [edi - 36] ; ecx = qlp_coeff[ 8] * data[i- 9]
- add ebp, ecx ; sum += qlp_coeff[ 8] * data[i- 9]
- mov ecx, [eax + 28] ; ecx = qlp_coeff[ 7]
- imul ecx, [edi - 32] ; ecx = qlp_coeff[ 7] * data[i- 8]
- add ebp, ecx ; sum += qlp_coeff[ 7] * data[i- 8]
- mov ecx, [eax + 24] ; ecx = qlp_coeff[ 6]
- imul ecx, [edi - 28] ; ecx = qlp_coeff[ 6] * data[i- 7]
- add ebp, ecx ; sum += qlp_coeff[ 6] * data[i- 7]
- mov ecx, [eax + 20] ; ecx = qlp_coeff[ 5]
- imul ecx, [edi - 24] ; ecx = qlp_coeff[ 5] * data[i- 6]
- add ebp, ecx ; sum += qlp_coeff[ 5] * data[i- 6]
- mov ecx, [eax + 16] ; ecx = qlp_coeff[ 4]
- imul ecx, [edi - 20] ; ecx = qlp_coeff[ 4] * data[i- 5]
- add ebp, ecx ; sum += qlp_coeff[ 4] * data[i- 5]
- mov ecx, [eax + 12] ; ecx = qlp_coeff[ 3]
- imul ecx, [edi - 16] ; ecx = qlp_coeff[ 3] * data[i- 4]
- add ebp, ecx ; sum += qlp_coeff[ 3] * data[i- 4]
- mov ecx, [eax + 8] ; ecx = qlp_coeff[ 2]
- imul ecx, [edi - 12] ; ecx = qlp_coeff[ 2] * data[i- 3]
- add ebp, ecx ; sum += qlp_coeff[ 2] * data[i- 3]
- mov ecx, [eax + 4] ; ecx = qlp_coeff[ 1]
- imul ecx, [edi - 8] ; ecx = qlp_coeff[ 1] * data[i- 2]
- add ebp, ecx ; sum += qlp_coeff[ 1] * data[i- 2]
- mov ecx, [eax] ; ecx = qlp_coeff[ 0] (NOTE: one byte missing from instruction)
- imul ecx, [edi - 4] ; ecx = qlp_coeff[ 0] * data[i- 1]
- add ebp, ecx ; sum += qlp_coeff[ 0] * data[i- 1]
-.jumper_0:
-
- mov ecx, [esp + 36]
- sar ebp, cl ; ebp = (sum >> lp_quantization)
- add ebp, [esi + edi] ; ebp = residual[i] + (sum >> lp_quantization)
- mov [edi], ebp ; data[i] = residual[i] + (sum >> lp_quantization)
- add edi, byte 4
-
- dec ebx
- jz short .end
- xor ebp, ebp
- jmp edx
-
-.end:
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-
-; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
-; the channel and qlp_coeffs must be <= 16. Especially note that this routine
-; cannot be used for side-channel coded 16bps channels since the effective bps
-; is 17.
-; WATCHOUT: this routine requires that each data array have a buffer of up to
-; 3 zeroes in front (at negative indices) for alignment purposes, i.e. for each
-; channel n, data[n][-1] through data[n][-3] should be accessible and zero.
- ALIGN 16
-cident FLAC__lpc_restore_signal_asm_ia32_mmx
- ;[esp + 40] data[]
- ;[esp + 36] lp_quantization
- ;[esp + 32] order
- ;[esp + 28] qlp_coeff[]
- ;[esp + 24] data_len
- ;[esp + 20] residual[]
-
- ;ASSERT(order > 0)
-
- push ebp
- push ebx
- push esi
- push edi
-
- mov esi, [esp + 20]
- mov edi, [esp + 40]
- mov eax, [esp + 32]
- mov ebx, [esp + 24]
-
- test ebx, ebx
- jz near .end ; do nothing if data_len == 0
- cmp eax, byte 4
- jb near FLAC__lpc_restore_signal_asm_ia32.begin
-
- mov edx, [esp + 28]
- movd mm6, [esp + 36]
- mov ebp, esp
-
- and esp, 0xfffffff8
-
- xor ecx, ecx
-.copy_qlp_loop:
- push word [edx + 4 * ecx]
- inc ecx
- cmp ecx, eax
- jnz short .copy_qlp_loop
-
- and ecx, 0x3
- test ecx, ecx
- je short .za_end
- sub ecx, byte 4
-.za_loop:
- push word 0
- inc eax
- inc ecx
- jnz short .za_loop
-.za_end:
-
- movq mm5, [esp + 2 * eax - 8]
- movd mm4, [edi - 16]
- punpckldq mm4, [edi - 12]
- movd mm0, [edi - 8]
- punpckldq mm0, [edi - 4]
- packssdw mm4, mm0
-
- cmp eax, byte 4
- jnbe short .mmx_4more
-
- ALIGN 16
-.mmx_4_loop_i:
- movq mm7, mm4
- pmaddwd mm7, mm5
- movq mm0, mm7
- punpckhdq mm7, mm7
- paddd mm7, mm0
- psrad mm7, mm6
- movd mm1, [esi]
- paddd mm7, mm1
- movd [edi], mm7
- psllq mm7, 48
- psrlq mm4, 16
- por mm4, mm7
-
- add esi, byte 4
- add edi, byte 4
-
- dec ebx
- jnz .mmx_4_loop_i
- jmp .mmx_end
-.mmx_4more:
- shl eax, 2
- neg eax
- add eax, byte 16
- ALIGN 16
-.mmx_4more_loop_i:
- mov ecx, edi
- add ecx, eax
- mov edx, esp
-
- movq mm7, mm4
- pmaddwd mm7, mm5
-
- ALIGN 16
-.mmx_4more_loop_j:
- movd mm0, [ecx - 16]
- punpckldq mm0, [ecx - 12]
- movd mm1, [ecx - 8]
- punpckldq mm1, [ecx - 4]
- packssdw mm0, mm1
- pmaddwd mm0, [edx]
- paddd mm7, mm0
-
- add edx, byte 8
- add ecx, byte 16
- cmp ecx, edi
- jnz .mmx_4more_loop_j
-
- movq mm0, mm7
- punpckhdq mm7, mm7
- paddd mm7, mm0
- psrad mm7, mm6
- movd mm1, [esi]
- paddd mm7, mm1
- movd [edi], mm7
- psllq mm7, 48
- psrlq mm4, 16
- por mm4, mm7
-
- add esi, byte 4
- add edi, byte 4
-
- dec ebx
- jnz short .mmx_4more_loop_i
-.mmx_end:
- emms
- mov esp, ebp
-
-.end:
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-
-
-; **********************************************************************
-;
;void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
; {
; unsigned i, j;
@@ -1098,285 +724,4 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
pop ebp
ret
-; **********************************************************************
-;
-; void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
-; {
-; unsigned i, j;
-; FLAC__int64 sum;
-;
-; FLAC__ASSERT(order > 0);
-;
-; for(i = 0; i < data_len; i++) {
-; sum = 0;
-; for(j = 0; j < order; j++)
-; sum += qlp_coeff[j] * (FLAC__int64)data[i-j-1];
-; data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
-; }
-; }
- ALIGN 16
-cident FLAC__lpc_restore_signal_wide_asm_ia32
- ;[esp + 40] data[]
- ;[esp + 36] lp_quantization
- ;[esp + 32] order
- ;[esp + 28] qlp_coeff[]
- ;[esp + 24] data_len
- ;[esp + 20] residual[]
-
- ;ASSERT(order > 0)
- ;ASSERT(order <= 32)
- ;ASSERT(lp_quantization <= 31)
-
- push ebp
- push ebx
- push esi
- push edi
-
- mov ebx, [esp + 24] ; ebx = data_len
- test ebx, ebx
- jz near .end ; do nothing if data_len == 0
-
-.begin:
- mov eax, [esp + 32] ; eax = order
- cmp eax, 1
- jg short .x87_32
-
- mov esi, [esp + 20] ; esi = residual[]
- mov edi, [esp + 40] ; edi = data[]
- mov ecx, [esp + 28] ; ecx = qlp_coeff[]
- mov ebp, [ecx] ; ebp = qlp_coeff[0]
- mov eax, [edi - 4] ; eax = data[-1]
- mov ecx, [esp + 36] ; cl = lp_quantization
- ALIGN 16
-.x87_1_loop_i:
- imul ebp ; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]
- shrd eax, edx, cl ; 0 <= lp_quantization <= 15
-;
- add eax, [esi]
- mov [edi], eax
-;
- add esi, 4
- add edi, 4
- dec ebx
- jnz .x87_1_loop_i
- jmp .end
-
-.mov_eip_to_eax:
- mov eax, [esp]
- ret
-
-.x87_32: ; eax = order
- neg eax
- add eax, eax
- lea ebp, [eax + eax * 4 + .jumper_0 - .get_eip0]
- call .mov_eip_to_eax
-.get_eip0:
- add ebp, eax
- inc ebp ; compensate for the shorter opcode on the last iteration
-
- mov ebx, [esp + 28] ; ebx = qlp_coeff[]
- mov edi, [esp + 40] ; esi = data[]
- sub [esp + 20], edi ; residual[] -= data[]
-
- xor ecx, ecx
- xor esi, esi
- jmp ebp
-
-;eax = --
-;edx = --
-;ecx = 0
-;esi = 0
-;
-;ebx = qlp_coeff[]
-;edi = data[]
-;ebp = @address
-
- mov eax, [ebx + 124] ; eax = qlp_coeff[31]
- imul dword [edi - 128] ; edx:eax = qlp_coeff[31] * data[i-32]
- add ecx, eax
- adc esi, edx ; sum += qlp_coeff[31] * data[i-32]
-
- mov eax, [ebx + 120] ; eax = qlp_coeff[30]
- imul dword [edi - 124] ; edx:eax = qlp_coeff[30] * data[i-31]
- add ecx, eax
- adc esi, edx ; sum += qlp_coeff[30] * data[i-31]
-
- mov eax, [ebx + 116]
- imul dword [edi - 120]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 112]
- imul dword [edi - 116]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 108]
- imul dword [edi - 112]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 104]
- imul dword [edi - 108]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 100]
- imul dword [edi - 104]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 96]
- imul dword [edi - 100]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 92]
- imul dword [edi - 96]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 88]
- imul dword [edi - 92]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 84]
- imul dword [edi - 88]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 80]
- imul dword [edi - 84]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 76]
- imul dword [edi - 80]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 72]
- imul dword [edi - 76]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 68]
- imul dword [edi - 72]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 64]
- imul dword [edi - 68]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 60]
- imul dword [edi - 64]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 56]
- imul dword [edi - 60]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 52]
- imul dword [edi - 56]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 48]
- imul dword [edi - 52]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 44]
- imul dword [edi - 48]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 40]
- imul dword [edi - 44]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 36]
- imul dword [edi - 40]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 32]
- imul dword [edi - 36]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 28]
- imul dword [edi - 32]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 24]
- imul dword [edi - 28]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 20]
- imul dword [edi - 24]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 16]
- imul dword [edi - 20]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 12]
- imul dword [edi - 16]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 8]
- imul dword [edi - 12]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 4]
- imul dword [edi - 8]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx] ; eax = qlp_coeff[ 0] (NOTE: one byte missing from instruction)
- imul dword [edi - 4] ; edx:eax = qlp_coeff[ 0] * data[i- 1]
- add ecx, eax
- adc esi, edx ; sum += qlp_coeff[ 0] * data[i- 1]
-
-.jumper_0:
- mov edx, ecx
-;esi:edx = sum
- mov ecx, [esp + 36] ; cl = lp_quantization
- shrd edx, esi, cl ; edx = (sum >> lp_quantization)
-;eax = --
-;ecx = --
-;edx = sum >> lp_q
-;esi = --
-;
- mov eax, [esp + 20] ; residual[] - data[]
- add edx, [edi + eax] ; edx = residual[i] + (sum >> lp_quantization)
- mov [edi], edx ; data[i] = residual[i] + (sum >> lp_quantization)
- add edi, 4
-
- dec dword [esp + 24]
- jz short .end
- xor ecx, ecx
- xor esi, esi
- jmp ebp
-
-.end:
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-
; end
diff --git a/src/libFLAC/include/private/lpc.h b/src/libFLAC/include/private/lpc.h
index 0e619c1d..f538e645 100644
--- a/src/libFLAC/include/private/lpc.h
+++ b/src/libFLAC/include/private/lpc.h
@@ -206,22 +206,6 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2(const FLA
*/
void FLAC__lpc_restore_signal(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[]);
void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[]);
-#ifndef FLAC__NO_ASM
-# ifdef FLAC__CPU_IA32
-# ifdef FLAC__HAS_NASM
-void FLAC__lpc_restore_signal_asm_ia32(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[]);
-void FLAC__lpc_restore_signal_asm_ia32_mmx(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[]);
-void FLAC__lpc_restore_signal_wide_asm_ia32(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[]);
-# endif /* FLAC__HAS_NASM */
-# endif /* FLAC__CPU_IA32 */
-# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
-# ifdef FLAC__SSE4_1_SUPPORTED
-void FLAC__lpc_restore_signal_intrin_sse41(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[]);
-void FLAC__lpc_restore_signal_16_intrin_sse41(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[]);
-void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[]);
-# endif
-# endif
-#endif /* FLAC__NO_ASM */
#ifndef FLAC__INTEGER_ONLY_LIBRARY
diff --git a/src/libFLAC/lpc_intrin_sse41.c b/src/libFLAC/lpc_intrin_sse41.c
index c37399be..74e7e956 100644
--- a/src/libFLAC/lpc_intrin_sse41.c
+++ b/src/libFLAC/lpc_intrin_sse41.c
@@ -588,550 +588,6 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(const FL
}
}
-FLAC__SSE_TARGET("sse4.1")
-void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[])
-{
- int i;
- const __m128i cnt = _mm_cvtsi32_si128(lp_quantization);
-
- if (!data_len)
- return;
-
- FLAC__ASSERT(order > 0);
- FLAC__ASSERT(order <= 32);
- FLAC__ASSERT(lp_quantization <= 32); /* there's no _mm_sra_epi64() so we have to use _mm_srl_epi64() */
-
- if(order <= 12) {
- if(order > 8) { /* order == 9, 10, 11, 12 */
- if(order > 10) { /* order == 11, 12 */
- __m128i qlp[6], dat[6];
- __m128i summ, temp;
- qlp[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0))); // 0 q[1] 0 q[0]
- qlp[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2))); // 0 q[3] 0 q[2]
- qlp[2] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4))); // 0 q[5] 0 q[4]
- qlp[3] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+6))); // 0 q[7] 0 q[6]
- qlp[4] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+8))); // 0 q[9] 0 q[8]
- if (order == 12)
- qlp[5] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+10))); // 0 q[11] 0 q[10]
- else
- qlp[5] = _mm_cvtepu32_epi64(_mm_cvtsi32_si128(qlp_coeff[10])); // 0 0 0 q[10]
-
- dat[5] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-12)), _MM_SHUFFLE(2,0,3,1)); // 0 d[i-12] 0 d[i-11]
- dat[4] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-10)), _MM_SHUFFLE(2,0,3,1)); // 0 d[i-10] 0 d[i-9]
- dat[3] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-8 )), _MM_SHUFFLE(2,0,3,1)); // 0 d[i-8] 0 d[i-7]
- dat[2] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-6 )), _MM_SHUFFLE(2,0,3,1)); // 0 d[i-6] 0 d[i-5]
- dat[1] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-4 )), _MM_SHUFFLE(2,0,3,1)); // 0 d[i-4] 0 d[i-3]
- dat[0] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-2 )), _MM_SHUFFLE(2,0,3,1)); // 0 d[i-2] 0 d[i-1]
-
- summ = _mm_mul_epi32(dat[5], qlp[5]) ;
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[4], qlp[4]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[3], qlp[3]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
- summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); // ?_64 sum_64
- summ = _mm_srl_epi64(summ, cnt); // ?_64 (sum >> lp_quantization)_64 == ?_32 ?_32 ?_32 (sum >> lp_quantization)_32
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[0]), summ); // ? ? ? d[i]
- data[0] = _mm_cvtsi128_si32(temp);
-
- for(i = 1; i < (int)data_len; i++) {
- temp = _mm_slli_si128(temp, 8);
- dat[5] = _mm_alignr_epi8(dat[5], dat[4], 8); // ? d[i-11] ? d[i-10]
- dat[4] = _mm_alignr_epi8(dat[4], dat[3], 8); // ? d[i-9] ? d[i-8]
- dat[3] = _mm_alignr_epi8(dat[3], dat[2], 8); // ? d[i-7] ? d[i-6]
- dat[2] = _mm_alignr_epi8(dat[2], dat[1], 8); // ? d[i-5] ? d[i-4]
- dat[1] = _mm_alignr_epi8(dat[1], dat[0], 8); // ? d[i-3] ? d[i-2]
- dat[0] = _mm_alignr_epi8(dat[0], temp, 8); // ? d[i-1] ? d[i ]
-
- summ = _mm_mul_epi32(dat[5], qlp[5]) ;
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[4], qlp[4]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[3], qlp[3]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
- summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8)); // ?_64 sum_64
- summ = _mm_srl_epi64(summ, cnt); // ?_64 (sum >> lp_quantization)_64 == ?_32 ?_32 ?_32 (sum >> lp_quantization)_32
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ); // ? ? ? d[i]
- data[i] = _mm_cvtsi128_si32(temp);
- }
- }
- else { /* order == 9, 10 */
- __m128i qlp[5], dat[5];
- __m128i summ, temp;
- qlp[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0)));
- qlp[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2)));
- qlp[2] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4)));
- qlp[3] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+6)));
- if (order == 10)
- qlp[4] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+8)));
- else
- qlp[4] = _mm_cvtepu32_epi64(_mm_cvtsi32_si128(qlp_coeff[8]));
-
- dat[4] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-10)), _MM_SHUFFLE(2,0,3,1));
- dat[3] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-8 )), _MM_SHUFFLE(2,0,3,1));
- dat[2] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-6 )), _MM_SHUFFLE(2,0,3,1));
- dat[1] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-4 )), _MM_SHUFFLE(2,0,3,1));
- dat[0] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-2 )), _MM_SHUFFLE(2,0,3,1));
-
- summ = _mm_mul_epi32(dat[4], qlp[4]) ;
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[3], qlp[3]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
- summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
- summ = _mm_srl_epi64(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[0]), summ);
- data[0] = _mm_cvtsi128_si32(temp);
-
- for(i = 1; i < (int)data_len; i++) {
- temp = _mm_slli_si128(temp, 8);
- dat[4] = _mm_alignr_epi8(dat[4], dat[3], 8);
- dat[3] = _mm_alignr_epi8(dat[3], dat[2], 8);
- dat[2] = _mm_alignr_epi8(dat[2], dat[1], 8);
- dat[1] = _mm_alignr_epi8(dat[1], dat[0], 8);
- dat[0] = _mm_alignr_epi8(dat[0], temp, 8);
-
- summ = _mm_mul_epi32(dat[4], qlp[4]) ;
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[3], qlp[3]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
- summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
- summ = _mm_srl_epi64(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ);
- data[i] = _mm_cvtsi128_si32(temp);
- }
- }
- }
- else if(order > 4) { /* order == 5, 6, 7, 8 */
- if(order > 6) { /* order == 7, 8 */
- __m128i qlp[4], dat[4];
- __m128i summ, temp;
- qlp[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0)));
- qlp[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2)));
- qlp[2] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4)));
- if (order == 8)
- qlp[3] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+6)));
- else
- qlp[3] = _mm_cvtepu32_epi64(_mm_cvtsi32_si128(qlp_coeff[6]));
-
- dat[3] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-8 )), _MM_SHUFFLE(2,0,3,1));
- dat[2] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-6 )), _MM_SHUFFLE(2,0,3,1));
- dat[1] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-4 )), _MM_SHUFFLE(2,0,3,1));
- dat[0] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-2 )), _MM_SHUFFLE(2,0,3,1));
-
- summ = _mm_mul_epi32(dat[3], qlp[3]) ;
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
- summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
- summ = _mm_srl_epi64(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[0]), summ);
- data[0] = _mm_cvtsi128_si32(temp);
-
- for(i = 1; i < (int)data_len; i++) {
- temp = _mm_slli_si128(temp, 8);
- dat[3] = _mm_alignr_epi8(dat[3], dat[2], 8);
- dat[2] = _mm_alignr_epi8(dat[2], dat[1], 8);
- dat[1] = _mm_alignr_epi8(dat[1], dat[0], 8);
- dat[0] = _mm_alignr_epi8(dat[0], temp, 8);
-
- summ = _mm_mul_epi32(dat[3], qlp[3]) ;
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
- summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
- summ = _mm_srl_epi64(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ);
- data[i] = _mm_cvtsi128_si32(temp);
- }
- }
- else { /* order == 5, 6 */
- __m128i qlp[3], dat[3];
- __m128i summ, temp;
- qlp[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0)));
- qlp[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2)));
- if (order == 6)
- qlp[2] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4)));
- else
- qlp[2] = _mm_cvtepu32_epi64(_mm_cvtsi32_si128(qlp_coeff[4]));
-
- dat[2] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-6 )), _MM_SHUFFLE(2,0,3,1));
- dat[1] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-4 )), _MM_SHUFFLE(2,0,3,1));
- dat[0] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-2 )), _MM_SHUFFLE(2,0,3,1));
-
- summ = _mm_mul_epi32(dat[2], qlp[2]) ;
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
- summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
- summ = _mm_srl_epi64(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[0]), summ);
- data[0] = _mm_cvtsi128_si32(temp);
-
- for(i = 1; i < (int)data_len; i++) {
- temp = _mm_slli_si128(temp, 8);
- dat[2] = _mm_alignr_epi8(dat[2], dat[1], 8);
- dat[1] = _mm_alignr_epi8(dat[1], dat[0], 8);
- dat[0] = _mm_alignr_epi8(dat[0], temp, 8);
-
- summ = _mm_mul_epi32(dat[2], qlp[2]) ;
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
- summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
- summ = _mm_srl_epi64(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ);
- data[i] = _mm_cvtsi128_si32(temp);
- }
- }
- }
- else { /* order == 1, 2, 3, 4 */
- if(order > 2) { /* order == 3, 4 */
- __m128i qlp[2], dat[2];
- __m128i summ, temp;
- qlp[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0)));
- if (order == 4)
- qlp[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2)));
- else
- qlp[1] = _mm_cvtepu32_epi64(_mm_cvtsi32_si128(qlp_coeff[2]));
-
- dat[1] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-4 )), _MM_SHUFFLE(2,0,3,1));
- dat[0] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-2 )), _MM_SHUFFLE(2,0,3,1));
-
- summ = _mm_mul_epi32(dat[1], qlp[1]) ;
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
- summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
- summ = _mm_srl_epi64(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[0]), summ);
- data[0] = _mm_cvtsi128_si32(temp);
-
- for(i = 1; i < (int)data_len; i++) {
- temp = _mm_slli_si128(temp, 8);
- dat[1] = _mm_alignr_epi8(dat[1], dat[0], 8);
- dat[0] = _mm_alignr_epi8(dat[0], temp, 8);
-
- summ = _mm_mul_epi32(dat[1], qlp[1]) ;
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
- summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
- summ = _mm_srl_epi64(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ);
- data[i] = _mm_cvtsi128_si32(temp);
- }
- }
- else { /* order == 1, 2 */
- if(order == 2) {
- __m128i qlp0, dat0;
- __m128i summ, temp;
- qlp0 = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff)));
-
- dat0 = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(data-2 )), _MM_SHUFFLE(2,0,3,1));
-
- summ = _mm_mul_epi32(dat0, qlp0);
-
- summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
- summ = _mm_srl_epi64(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[0]), summ);
- data[0] = _mm_cvtsi128_si32(temp);
-
- for(i = 1; i < (int)data_len; i++) {
- dat0 = _mm_alignr_epi8(dat0, _mm_slli_si128(temp, 8), 8);
-
- summ = _mm_mul_epi32(dat0, qlp0);
-
- summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
- summ = _mm_srl_epi64(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ);
- data[i] = _mm_cvtsi128_si32(temp);
- }
- }
- else { /* order == 1 */
- __m128i qlp0;
- __m128i summ, temp;
- qlp0 = _mm_cvtsi32_si128(qlp_coeff[0]);
- temp = _mm_cvtsi32_si128(data[-1]);
-
- summ = _mm_mul_epi32(temp, qlp0);
- summ = _mm_srl_epi64(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[0]), summ);
- data[0] = _mm_cvtsi128_si32(temp);
-
- for(i = 1; i < (int)data_len; i++) {
- summ = _mm_mul_epi32(temp, qlp0);
- summ = _mm_srl_epi64(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ);
- data[i] = _mm_cvtsi128_si32(temp);
- }
- }
- }
- }
- }
- else { /* order > 12 */
- __m128i qlp[16];
-
- for(i = 0; i < (int)order/2; i++)
- qlp[i] = _mm_shuffle_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+i*2)), _MM_SHUFFLE(2,0,3,1)); // 0 q[2*i] 0 q[2*i+1]
- if(order & 1)
- qlp[i] = _mm_shuffle_epi32(_mm_cvtsi32_si128(qlp_coeff[i*2]), _MM_SHUFFLE(2,0,3,1));
-
- for(i = 0; i < (int)data_len; i++) {
- __m128i summ = _mm_setzero_si128(), dat;
- FLAC__int32 * const datai = &data[i];
-
- switch((order+1) / 2) {
- case 16: /* order == 31, 32 */
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-32)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[15])); /* Falls through. */
- case 15:
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-30)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[14])); /* Falls through. */
- case 14:
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-28)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[13])); /* Falls through. */
- case 13:
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-26)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[12])); /* Falls through. */
- case 12:
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-24)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[11])); /* Falls through. */
- case 11:
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-22)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[10])); /* Falls through. */
- case 10:
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-20)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[9])); /* Falls through. */
- case 9:
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-18)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[8])); /* Falls through. */
- case 8:
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-16)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[7])); /* Falls through. */
- case 7: /* order == 13, 14 */
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-14)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[6]));
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-12)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[5]));
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-10)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[4]));
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-8)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[3]));
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-6)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[2]));
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-4)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[1]));
- dat = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(const void*)(datai-2)));
- summ = _mm_add_epi64(summ, _mm_mul_epi32(dat, qlp[0]));
- }
- summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
- summ = _mm_srl_epi64(summ, cnt);
- summ = _mm_add_epi32(summ, _mm_cvtsi32_si128(residual[i]));
- data[i] = _mm_cvtsi128_si32(summ);
- }
- }
-}
-
-FLAC__SSE_TARGET("sse4.1")
-void FLAC__lpc_restore_signal_intrin_sse41(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[])
-{
- if(order < 8) {
- FLAC__lpc_restore_signal(residual, data_len, qlp_coeff, order, lp_quantization, data);
- return;
- }
-
- FLAC__ASSERT(order >= 8);
- FLAC__ASSERT(order <= 32);
-
- if(order <= 12) {
- int i;
- const __m128i cnt = _mm_cvtsi32_si128(lp_quantization);
-
- if(order > 8) /* order == 9, 10, 11, 12 */
- {
- __m128i qlp[3], dat[3];
- __m128i summ, temp;
-
- qlp[0] = _mm_loadu_si128((const __m128i*)(const void*)(qlp_coeff + 0)); // q[3] q[2] q[1] q[0]
- qlp[1] = _mm_loadu_si128((const __m128i*)(const void*)(qlp_coeff + 4)); // q[7] q[6] q[5] q[4]
- qlp[2] = _mm_loadu_si128((const __m128i*)(const void*)(qlp_coeff + 8)); // q[11] q[10] q[9] q[8]
- switch (order)
- {
- case 9:
- qlp[2] = _mm_slli_si128(qlp[2], 12); qlp[2] = _mm_srli_si128(qlp[2], 12); break; // 0 0 0 q[8]
- case 10:
- qlp[2] = _mm_slli_si128(qlp[2], 8); qlp[2] = _mm_srli_si128(qlp[2], 8); break; // 0 0 q[9] q[8]
- case 11:
- qlp[2] = _mm_slli_si128(qlp[2], 4); qlp[2] = _mm_srli_si128(qlp[2], 4); break; // 0 q[10] q[9] q[8]
- }
-
- dat[2] = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data - 12)), _MM_SHUFFLE(0, 1, 2, 3)); // d[i-12] d[i-11] d[i-10] d[i-9]
- dat[1] = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data - 8)), _MM_SHUFFLE(0, 1, 2, 3)); // d[i-8] d[i-7] d[i-6] d[i-5]
- dat[0] = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data - 4)), _MM_SHUFFLE(0, 1, 2, 3)); // d[i-4] d[i-3] d[i-2] d[i-1]
-
- for (i = 0;;) {
- summ = _mm_mullo_epi32(dat[2], qlp[2]);
- summ = _mm_add_epi32(summ, _mm_mullo_epi32(dat[1], qlp[1]));
- summ = _mm_add_epi32(summ, _mm_mullo_epi32(dat[0], qlp[0]));
-
- summ = _mm_add_epi32(summ, _mm_shuffle_epi32(summ, _MM_SHUFFLE(1,0,3,2)));
- summ = _mm_add_epi32(summ, _mm_shufflelo_epi16(summ, _MM_SHUFFLE(1,0,3,2)));
-
- summ = _mm_sra_epi32(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ);
- data[i] = _mm_cvtsi128_si32(temp);
-
- if(++i >= (int)data_len) break;
-
- temp = _mm_slli_si128(temp, 12);
- dat[2] = _mm_alignr_epi8(dat[2], dat[1], 12);
- dat[1] = _mm_alignr_epi8(dat[1], dat[0], 12);
- dat[0] = _mm_alignr_epi8(dat[0], temp, 12);
- }
- }
- else /* order == 8 */
- {
- __m128i qlp[2], dat[2];
- __m128i summ, temp;
-
- qlp[0] = _mm_loadu_si128((const __m128i*)(const void*)(qlp_coeff + 0));
- qlp[1] = _mm_loadu_si128((const __m128i*)(const void*)(qlp_coeff + 4));
-
- dat[1] = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data - 8)), _MM_SHUFFLE(0, 1, 2, 3));
- dat[0] = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data - 4)), _MM_SHUFFLE(0, 1, 2, 3));
-
- for (i = 0;;) {
- summ = _mm_add_epi32(_mm_mullo_epi32(dat[1], qlp[1]), _mm_mullo_epi32(dat[0], qlp[0]));
-
- summ = _mm_add_epi32(summ, _mm_shuffle_epi32(summ, _MM_SHUFFLE(1,0,3,2)));
- summ = _mm_add_epi32(summ, _mm_shufflelo_epi16(summ, _MM_SHUFFLE(1,0,3,2)));
-
- summ = _mm_sra_epi32(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ);
- data[i] = _mm_cvtsi128_si32(temp);
-
- if(++i >= (int)data_len) break;
-
- temp = _mm_slli_si128(temp, 12);
- dat[1] = _mm_alignr_epi8(dat[1], dat[0], 12);
- dat[0] = _mm_alignr_epi8(dat[0], temp, 12);
- }
- }
- }
- else { /* order > 12 */
-#ifdef FLAC__HAS_NASM
- FLAC__lpc_restore_signal_asm_ia32(residual, data_len, qlp_coeff, order, lp_quantization, data);
-#else
- FLAC__lpc_restore_signal(residual, data_len, qlp_coeff, order, lp_quantization, data);
-#endif
- }
-}
-
-FLAC__SSE_TARGET("ssse3")
-void FLAC__lpc_restore_signal_16_intrin_sse41(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[])
-{
- if(order < 8) {
- FLAC__lpc_restore_signal(residual, data_len, qlp_coeff, order, lp_quantization, data);
- return;
- }
-
- FLAC__ASSERT(order >= 8);
- FLAC__ASSERT(order <= 32);
-
- if(order <= 12) {
- int i;
- const __m128i cnt = _mm_cvtsi32_si128(lp_quantization);
-
- if(order > 8) /* order == 9, 10, 11, 12 */
- {
- __m128i qlp[2], dat[2];
- __m128i summ, temp;
-
- qlp[0] = _mm_loadu_si128((const __m128i*)(const void*)(qlp_coeff+0)); // q[3] q[2] q[1] q[0]
- temp = _mm_loadu_si128((const __m128i*)(const void*)(qlp_coeff+4)); // q[7] q[6] q[5] q[4]
- qlp[1] = _mm_loadu_si128((const __m128i*)(const void*)(qlp_coeff+8)); // q[11] q[10] q[9] q[8]
- switch(order)
- {
- case 9:
- qlp[1] = _mm_slli_si128(qlp[1], 12); qlp[1] = _mm_srli_si128(qlp[1], 12); break; // 0 0 0 q[8]
- case 10:
- qlp[1] = _mm_slli_si128(qlp[1], 8); qlp[1] = _mm_srli_si128(qlp[1], 8); break; // 0 0 q[9] q[8]
- case 11:
- qlp[1] = _mm_slli_si128(qlp[1], 4); qlp[1] = _mm_srli_si128(qlp[1], 4); break; // 0 q[10] q[9] q[8]
- }
- qlp[0] = _mm_packs_epi32(qlp[0], temp); // q[7] q[6] q[5] q[4] q[3] q[2] q[1] q[0]
- qlp[1] = _mm_packs_epi32(qlp[1], _mm_setzero_si128()); // 0 0 0 0 q[11] q[10] q[9] q[8]
-
- dat[1] = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data-12)), _MM_SHUFFLE(0,1,2,3)); // d[i-12] d[i-11] d[i-10] d[i-9]
- temp = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data-8)), _MM_SHUFFLE(0,1,2,3)); // d[i-8] d[i-7] d[i-6] d[i-5]
- dat[0] = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data-4)), _MM_SHUFFLE(0,1,2,3)); // d[i-4] d[i-3] d[i-2] d[i-1]
-
- dat[1] = _mm_packs_epi32(dat[1], _mm_setzero_si128()); // 0 0 0 0 d[i-12] d[i-11] d[i-10] d[i-9]
- dat[0] = _mm_packs_epi32(dat[0], temp); // d[i-8] d[i-7] d[i-6] d[i-5] d[i-4] d[i-3] d[i-2] d[i-1]
-
- for(i = 0;;) {
- summ = _mm_madd_epi16(dat[1], qlp[1]);
- summ = _mm_add_epi32(summ, _mm_madd_epi16(dat[0], qlp[0]));
-
- summ = _mm_add_epi32(summ, _mm_shuffle_epi32(summ, _MM_SHUFFLE(1,0,3,2)));
- summ = _mm_add_epi32(summ, _mm_shufflelo_epi16(summ, _MM_SHUFFLE(1,0,3,2)));
-
- summ = _mm_sra_epi32(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ);
- data[i] = _mm_cvtsi128_si32(temp);
-
- if(++i >= (int)data_len) break;
-
- temp = _mm_slli_si128(temp, 14);
- dat[1] = _mm_alignr_epi8(dat[1], dat[0], 14); // 0 0 0 d[i-12] d[i-11] d[i-10] d[i-9] d[i-8]
- dat[0] = _mm_alignr_epi8(dat[0], temp, 14); // d[i-7] d[i-6] d[i-5] d[i-4] d[i-3] d[i-2] d[i-1] d[i]
- }
- }
- else /* order == 8 */
- {
- __m128i qlp0, dat0;
- __m128i summ, temp;
-
- qlp0 = _mm_loadu_si128((const __m128i*)(const void*)(qlp_coeff+0)); // q[3] q[2] q[1] q[0]
- temp = _mm_loadu_si128((const __m128i*)(const void*)(qlp_coeff+4)); // q[7] q[6] q[5] q[4]
- qlp0 = _mm_packs_epi32(qlp0, temp); // q[7] q[6] q[5] q[4] q[3] q[2] q[1] q[0]
-
- temp = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data-8)), _MM_SHUFFLE(0,1,2,3));
- dat0 = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data-4)), _MM_SHUFFLE(0,1,2,3));
- dat0 = _mm_packs_epi32(dat0, temp); // d[i-8] d[i-7] d[i-6] d[i-5] d[i-4] d[i-3] d[i-2] d[i-1]
-
- for(i = 0;;) {
- summ = _mm_madd_epi16(dat0, qlp0);
-
- summ = _mm_add_epi32(summ, _mm_shuffle_epi32(summ, _MM_SHUFFLE(1,0,3,2)));
- summ = _mm_add_epi32(summ, _mm_shufflelo_epi16(summ, _MM_SHUFFLE(1,0,3,2)));
-
- summ = _mm_sra_epi32(summ, cnt);
- temp = _mm_add_epi32(_mm_cvtsi32_si128(residual[i]), summ);
- data[i] = _mm_cvtsi128_si32(temp);
-
- if(++i >= (int)data_len) break;
-
- temp = _mm_slli_si128(temp, 14);
- dat0 = _mm_alignr_epi8(dat0, temp, 14); // d[i-7] d[i-6] d[i-5] d[i-4] d[i-3] d[i-2] d[i-1] d[i]
- }
- }
- }
- else { /* order > 12 */
-#ifdef FLAC__HAS_NASM
- FLAC__lpc_restore_signal_asm_ia32_mmx(residual, data_len, qlp_coeff, order, lp_quantization, data);
-#else
- FLAC__lpc_restore_signal(residual, data_len, qlp_coeff, order, lp_quantization, data);
-#endif
- }
-}
-
#endif /* defined FLAC__CPU_IA32 */
FLAC__SSE_TARGET("sse4.1")
diff --git a/src/libFLAC/stream_decoder.c b/src/libFLAC/stream_decoder.c
index eab9e8b4..cc583d7d 100644
--- a/src/libFLAC/stream_decoder.c
+++ b/src/libFLAC/stream_decoder.c
@@ -45,7 +45,6 @@
#include "protected/stream_decoder.h"
#include "private/bitreader.h"
#include "private/bitmath.h"
-#include "private/cpu.h"
#include "private/crc.h"
#include "private/fixed.h"
#include "private/format.h"
@@ -129,12 +128,6 @@ typedef struct FLAC__StreamDecoderPrivate {
FLAC__StreamDecoderWriteCallback write_callback;
FLAC__StreamDecoderMetadataCallback metadata_callback;
FLAC__StreamDecoderErrorCallback error_callback;
- /* generic 32-bit datapath: */
- void (*local_lpc_restore_signal)(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[]);
- /* generic 64-bit datapath: */
- void (*local_lpc_restore_signal_64bit)(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[]);
- /* for use when the signal is <= 16 bits-per-sample, or <= 15 bits-per-sample on a side channel (which requires 1 extra bit): */
- void (*local_lpc_restore_signal_16bit)(const FLAC__int32 residual[], uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 data[]);
void *client_data;
FILE *file; /* only used if FLAC__stream_decoder_init_file()/FLAC__stream_decoder_init_file() called, else NULL */
FLAC__BitReader *input;
@@ -152,7 +145,6 @@ typedef struct FLAC__StreamDecoderPrivate {
size_t metadata_filter_ids_count, metadata_filter_ids_capacity; /* units for both are IDs, not bytes */
FLAC__Frame frame;
FLAC__bool cached; /* true if there is a byte in lookahead */
- FLAC__CPUInfo cpuinfo;
FLAC__byte header_warmup[2]; /* contains the sync code and reserved bits */
FLAC__byte lookahead; /* temp storage when we need to look ahead one byte in the stream */
/* unaligned (original) pointers to allocated data */
@@ -373,48 +365,6 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_(
return decoder->protected_->initstate = FLAC__STREAM_DECODER_INIT_STATUS_ERROR_OPENING_FILE;
#endif
- /*
- * get the CPU info and set the function pointers
- */
- FLAC__cpu_info(&decoder->private_->cpuinfo);
- /* first default to the non-asm routines */
- decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal;
- decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide;
- decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal;
- /* now override with asm where appropriate */
-#ifndef FLAC__NO_ASM
- if(decoder->private_->cpuinfo.use_asm) {
-#ifdef FLAC__CPU_IA32
- FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32);
-#ifdef FLAC__HAS_NASM
- decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_asm_ia32; /* OPT_IA32: was really necessary for GCC < 4.9 */
- if (decoder->private_->cpuinfo.x86.mmx) {
- decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
- decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32_mmx;
- }
- else {
- decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
- decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32;
- }
-#endif
-#if FLAC__HAS_X86INTRIN && ! defined FLAC__INTEGER_ONLY_LIBRARY
-# if defined FLAC__SSE4_1_SUPPORTED
- if (decoder->private_->cpuinfo.x86.sse41) {
-# if !defined FLAC__HAS_NASM /* these are not undoubtedly faster than their MMX ASM counterparts */
- decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_intrin_sse41;
- decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse41;
-# endif
- decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41;
- }
-# endif
-#endif
-#elif defined FLAC__CPU_X86_64
- FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_X86_64);
- /* No useful SSE optimizations yet */
-#endif
- }
-#endif
-
/* from here on, errors are fatal */
if(!FLAC__bitreader_init(decoder->private_->input, read_callback_, decoder)) {
@@ -2848,12 +2798,9 @@ FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, uint32_t channel, ui
if(do_full_decode) {
memcpy(decoder->private_->output[channel], subframe->warmup, sizeof(FLAC__int32) * order);
if(bps + subframe->qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32)
- if(bps <= 16 && subframe->qlp_coeff_precision <= 16)
- decoder->private_->local_lpc_restore_signal_16bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
- else
- decoder->private_->local_lpc_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
+ FLAC__lpc_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
else
- decoder->private_->local_lpc_restore_signal_64bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
+ FLAC__lpc_restore_signal_wide(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
}
return true;