diff options
author | Denys Vlasenko | 2022-01-01 15:42:15 +0100 |
---|---|---|
committer | Denys Vlasenko | 2022-01-01 15:42:15 +0100 |
commit | 4d4f1f2096f06d69a6f205f0d8e33d4398f25677 (patch) | |
tree | f3ab167117ee36b55d98ddd6cc49eb087de64b0b /libbb | |
parent | d643010feeef312c77d7f51c3dd476d4e605c982 (diff) | |
download | busybox-4d4f1f2096f06d69a6f205f0d8e33d4398f25677.zip busybox-4d4f1f2096f06d69a6f205f0d8e33d4398f25677.tar.gz |
libbb/sha1: x86_64 version: bswap in 64-bit chunks
function old new delta
sha1_process_block64 3562 3570 +8
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb')
-rw-r--r-- | libbb/Config.src | 2 | ||||
-rw-r--r-- | libbb/hash_md5_sha.c | 42 |
2 files changed, 23 insertions, 21 deletions
diff --git a/libbb/Config.src b/libbb/Config.src index f66f65f..42a2283 100644 --- a/libbb/Config.src +++ b/libbb/Config.src @@ -59,7 +59,7 @@ config SHA1_SMALL Trade binary size versus speed for the sha1 algorithm. throughput MB/s size of sha1_process_block64 value 486 x86-64 486 x86-64 - 0 367 367 3657 3562 + 0 367 367 3657 3570 1 224 229 654 732 2,3 200 195 358 380 diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index a4e3606..959bfc9 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -867,27 +867,29 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) .endif \n\ .endm \n\ \n\ - movl 4*8(%rdi), %r8d \n\ - bswap %r8d \n\ - movl 4*9(%rdi), %r9d \n\ - bswap %r9d \n\ - movl 4*10(%rdi), %r10d \n\ - bswap %r10d \n\ - movl 4*11(%rdi), %r11d \n\ - bswap %r11d \n\ - movl 4*12(%rdi), %r12d \n\ - bswap %r12d \n\ - movl 4*13(%rdi), %r13d \n\ - bswap %r13d \n\ - movl 4*14(%rdi), %r14d \n\ - bswap %r14d \n\ - movl 4*15(%rdi), %r15d \n\ - bswap %r15d \n\ - movl $7, %eax \n\ + movq 4*8(%rdi), %r8 \n\ + bswap %r8 \n\ + movl %r8d, %r9d \n\ + shrq $32, %r8 \n\ + movq 4*10(%rdi), %r10 \n\ + bswap %r10 \n\ + movl %r10d, %r11d \n\ + shrq $32, %r10 \n\ + movq 4*12(%rdi), %r12 \n\ + bswap %r12 \n\ + movl %r12d, %r13d \n\ + shrq $32, %r12 \n\ + movq 4*14(%rdi), %r14 \n\ + bswap %r14 \n\ + movl %r14d, %r15d \n\ + shrq $32, %r14 \n\ + \n\ + movl $3, %eax \n\ 1: \n\ - movl (%rdi,%rax,4), %esi \n\ - bswap %esi \n\ - movl %esi, -32(%rsp,%rax,4) \n\ + movq (%rdi,%rax,8), %rsi \n\ + bswap %rsi \n\ + rolq $32, %rsi \n\ + movq %rsi, -32(%rsp,%rax,8) \n\ decl %eax \n\ jns 1b \n\ movl 80(%rdi), %eax # a = ctx->hash[0] \n\ |