summaryrefslogtreecommitdiff
path: root/libbb
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2022-01-01 15:42:15 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2022-01-01 15:42:15 +0100
commit4d4f1f2096f06d69a6f205f0d8e33d4398f25677 (patch)
treef3ab167117ee36b55d98ddd6cc49eb087de64b0b /libbb
parentd643010feeef312c77d7f51c3dd476d4e605c982 (diff)
downloadbusybox-4d4f1f2096f06d69a6f205f0d8e33d4398f25677.tar.gz
libbb/sha1: x86_64 version: bswap in 64-bit chunks
function old new delta sha1_process_block64 3562 3570 +8 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb')
-rw-r--r--libbb/Config.src2
-rw-r--r--libbb/hash_md5_sha.c42
2 files changed, 23 insertions, 21 deletions
diff --git a/libbb/Config.src b/libbb/Config.src
index f66f65f81..42a2283aa 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -59,7 +59,7 @@ config SHA1_SMALL
Trade binary size versus speed for the sha1 algorithm.
throughput MB/s size of sha1_process_block64
value 486 x86-64 486 x86-64
- 0 367 367 3657 3562
+ 0 367 367 3657 3570
1 224 229 654 732
2,3 200 195 358 380
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c
index a4e36066a..959bfc951 100644
--- a/libbb/hash_md5_sha.c
+++ b/libbb/hash_md5_sha.c
@@ -867,27 +867,29 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
.endif \n\
.endm \n\
\n\
- movl 4*8(%rdi), %r8d \n\
- bswap %r8d \n\
- movl 4*9(%rdi), %r9d \n\
- bswap %r9d \n\
- movl 4*10(%rdi), %r10d \n\
- bswap %r10d \n\
- movl 4*11(%rdi), %r11d \n\
- bswap %r11d \n\
- movl 4*12(%rdi), %r12d \n\
- bswap %r12d \n\
- movl 4*13(%rdi), %r13d \n\
- bswap %r13d \n\
- movl 4*14(%rdi), %r14d \n\
- bswap %r14d \n\
- movl 4*15(%rdi), %r15d \n\
- bswap %r15d \n\
- movl $7, %eax \n\
+ movq 4*8(%rdi), %r8 \n\
+ bswap %r8 \n\
+ movl %r8d, %r9d \n\
+ shrq $32, %r8 \n\
+ movq 4*10(%rdi), %r10 \n\
+ bswap %r10 \n\
+ movl %r10d, %r11d \n\
+ shrq $32, %r10 \n\
+ movq 4*12(%rdi), %r12 \n\
+ bswap %r12 \n\
+ movl %r12d, %r13d \n\
+ shrq $32, %r12 \n\
+ movq 4*14(%rdi), %r14 \n\
+ bswap %r14 \n\
+ movl %r14d, %r15d \n\
+ shrq $32, %r14 \n\
+ \n\
+ movl $3, %eax \n\
1: \n\
- movl (%rdi,%rax,4), %esi \n\
- bswap %esi \n\
- movl %esi, -32(%rsp,%rax,4) \n\
+ movq (%rdi,%rax,8), %rsi \n\
+ bswap %rsi \n\
+ rolq $32, %rsi \n\
+ movq %rsi, -32(%rsp,%rax,8) \n\
decl %eax \n\
jns 1b \n\
movl 80(%rdi), %eax # a = ctx->hash[0] \n\