diff options
author | Josh Coalson <jcoalson@users.sourceforce.net> | 2007-03-22 07:23:14 +0000 |
---|---|---|
committer | Josh Coalson <jcoalson@users.sourceforce.net> | 2007-03-22 07:23:14 +0000 |
commit | 6942044be1cce24e339c92f23ee1e5b7508c36ab (patch) | |
tree | c60d2fa7a365cba5b5f7edb1f061db9ee300186d | |
parent | 645620c24b81ff9cfd048b8fc26ebd91f8506599 (diff) | |
download | flac-6942044be1cce24e339c92f23ee1e5b7508c36ab.tar.gz |
optimization: faster crc-16 calculation
-rw-r--r-- | src/libFLAC/ia32/bitreader_asm.nasm | 78 |
1 files changed, 39 insertions, 39 deletions
diff --git a/src/libFLAC/ia32/bitreader_asm.nasm b/src/libFLAC/ia32/bitreader_asm.nasm index 6decd349..5b923aa1 100644 --- a/src/libFLAC/ia32/bitreader_asm.nasm +++ b/src/libFLAC/ia32/bitreader_asm.nasm @@ -151,26 +151,26 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap jnz .c0b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case] .c0b0: xor dl, ah ; dl <- (crc>>8)^(word>>24) movzx ebx, dl - mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)] - shl ax, 8 ; ax <- (crc<<8) - xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)] + mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)] + shl eax, 8 ; ax <- (crc<<8) + xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)] .c0b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff)) movzx ebx, dh - mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] - shl ax, 8 ; ax <- (crc<<8) - xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] + mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] + shl eax, 8 ; ax <- (crc<<8) + xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] shr edx, 16 .c0b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff)) movzx ebx, dl - mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] - shl ax, 8 ; ax <- (crc<<8) - xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] + mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] + shl eax, 8 ; ax <- (crc<<8) + xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] .c0b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff) movzx ebx, dh - mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] - shl ax, 8 ; ax <- (crc<<8) - xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] - mov [ebp + 24], eax ; br->read_crc <- crc + mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] + shl eax, 8 ; ax <- (crc<<8) + xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] + mov [ebp + 24], ax ; br->read_crc <- crc pop ecx pop edi @@ -225,26 +225,26 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap jnz .c1b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case] .c1b0: xor dl, ah ; dl <- (crc>>8)^(word>>24) movzx ebx, dl - mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)] - shl ax, 8 ; ax <- (crc<<8) - xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)] + mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)] + shl eax, 8 ; ax <- (crc<<8) + xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)] .c1b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff)) movzx ebx, dh - mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] - shl ax, 8 ; ax <- (crc<<8) - xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] + mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] + shl eax, 8 ; ax <- (crc<<8) + xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] shr edx, 16 .c1b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff)) movzx ebx, dl - mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] - shl ax, 8 ; ax <- (crc<<8) - xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] + mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] + shl eax, 8 ; ax <- (crc<<8) + xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] .c1b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff) movzx ebx, dh - mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] - shl ax, 8 ; ax <- (crc<<8) - xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] - mov [ebp + 24], eax ; br->read_crc <- crc + mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] + shl eax, 8 ; ax <- (crc<<8) + xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] + mov [ebp + 24], ax ; br->read_crc <- crc pop ecx pop edi @@ -436,26 +436,26 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap jnz .c2b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case] .c2b0: xor dl, ah ; dl <- (crc>>8)^(word>>24) movzx ebx, dl - mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)] - shl ax, 8 ; ax <- (crc<<8) - xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)] + mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)] + shl eax, 8 ; ax <- (crc<<8) + xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)] .c2b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff)) movzx ebx, dh - mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] - shl ax, 8 ; ax <- (crc<<8) - xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] + mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] + shl eax, 8 ; ax <- (crc<<8) + xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] shr edx, 16 .c2b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff)) movzx ebx, dl - mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] - shl ax, 8 ; ax <- (crc<<8) - xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] + mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] + shl eax, 8 ; ax <- (crc<<8) + xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] .c2b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff) movzx ebx, dh - mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] - shl ax, 8 ; ax <- (crc<<8) - xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] - mov [ebp + 24], eax ; br->read_crc <- crc + mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] + shl eax, 8 ; ax <- (crc<<8) + xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] + mov [ebp + 24], ax ; br->read_crc <- crc pop eax pop ecx pop ebx |