From 4d4f1f2096f06d69a6f205f0d8e33d4398f25677 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 1 Jan 2022 15:42:15 +0100 Subject: libbb/sha1: x86_64 version: bswap in 64-bit chunks function old new delta sha1_process_block64 3562 3570 +8 Signed-off-by: Denys Vlasenko --- libbb/Config.src | 2 +- libbb/hash_md5_sha.c | 42 ++++++++++++++++++++++-------------------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/libbb/Config.src b/libbb/Config.src index f66f65f..42a2283 100644 --- a/libbb/Config.src +++ b/libbb/Config.src @@ -59,7 +59,7 @@ config SHA1_SMALL Trade binary size versus speed for the sha1 algorithm. throughput MB/s size of sha1_process_block64 value 486 x86-64 486 x86-64 - 0 367 367 3657 3562 + 0 367 367 3657 3570 1 224 229 654 732 2,3 200 195 358 380 diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index a4e3606..959bfc9 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -867,27 +867,29 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) .endif \n\ .endm \n\ \n\ - movl 4*8(%rdi), %r8d \n\ - bswap %r8d \n\ - movl 4*9(%rdi), %r9d \n\ - bswap %r9d \n\ - movl 4*10(%rdi), %r10d \n\ - bswap %r10d \n\ - movl 4*11(%rdi), %r11d \n\ - bswap %r11d \n\ - movl 4*12(%rdi), %r12d \n\ - bswap %r12d \n\ - movl 4*13(%rdi), %r13d \n\ - bswap %r13d \n\ - movl 4*14(%rdi), %r14d \n\ - bswap %r14d \n\ - movl 4*15(%rdi), %r15d \n\ - bswap %r15d \n\ - movl $7, %eax \n\ + movq 4*8(%rdi), %r8 \n\ + bswap %r8 \n\ + movl %r8d, %r9d \n\ + shrq $32, %r8 \n\ + movq 4*10(%rdi), %r10 \n\ + bswap %r10 \n\ + movl %r10d, %r11d \n\ + shrq $32, %r10 \n\ + movq 4*12(%rdi), %r12 \n\ + bswap %r12 \n\ + movl %r12d, %r13d \n\ + shrq $32, %r12 \n\ + movq 4*14(%rdi), %r14 \n\ + bswap %r14 \n\ + movl %r14d, %r15d \n\ + shrq $32, %r14 \n\ + \n\ + movl $3, %eax \n\ 1: \n\ - movl (%rdi,%rax,4), %esi \n\ - bswap %esi \n\ - movl %esi, -32(%rsp,%rax,4) \n\ + movq (%rdi,%rax,8), %rsi \n\ + bswap %rsi \n\ + rolq $32, %rsi \n\ + movq %rsi, -32(%rsp,%rax,8) \n\ decl %eax \n\ jns 1b \n\ movl 80(%rdi), %eax # a = ctx->hash[0] \n\ -- cgit v1.1