diff options
author | Denys Vlasenko | 2021-12-31 17:06:00 +0100 |
---|---|---|
committer | Denys Vlasenko | 2021-12-31 17:07:47 +0100 |
commit | f09d088fdf6eeeba902fb5627930145a3058a5f0 (patch) | |
tree | 6c3b17c675f4860babf27dd7f4056921fbad9896 | |
parent | 0b62a08777e29c34f947c791a1eded5b97e05699 (diff) | |
download | busybox-f09d088fdf6eeeba902fb5627930145a3058a5f0.zip busybox-f09d088fdf6eeeba902fb5627930145a3058a5f0.tar.gz |
libbb/sha1: shrink and speed up fully unrolled version
function old new delta
sha1_process_block64 4149 3950 -199
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | libbb/Config.src | 2 | ||||
-rw-r--r-- | libbb/hash_md5_sha.c | 22 |
2 files changed, 23 insertions, 1 deletions
diff --git a/libbb/Config.src b/libbb/Config.src index c793f59..d2054dc 100644 --- a/libbb/Config.src +++ b/libbb/Config.src @@ -59,7 +59,7 @@ config SHA1_SMALL Trade binary size versus speed for the sha1 algorithm. throughput MB/s size of sha1_process_block64 value 486 x86-64 486 x86-64 - 0 339 374 4149 4167 + 0 360 374 3950 4167 1 224 229 654 732 2,3 200 195 358 380 diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 053ebe2..faf485d 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -509,6 +509,27 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) d = ctx->hash[3]; e = ctx->hash[4]; +/* From kernel source comments: + * """ + * If you have 32 registers or more, the compiler can (and should) + * try to change the array[] accesses into registers. However, on + * machines with less than ~25 registers, that won't really work, + * and at least gcc will make an unholy mess of it. + * + * So to avoid that mess which just slows things down, we force + * the stores to memory to actually happen (we might be better off + * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as + * suggested by Artur Skawina - that will also make gcc unable to + * try to do the silly "optimize away loads" part because it won't + * see what the value will be). + * """ + */ +#if defined(__i386__) +# define DO_NOT_TRY_PROPAGATING(m) asm("":"+m"(m)) +#else +# define DO_NOT_TRY_PROPAGATING(m) ((void)0) +#endif + #undef OP #define OP(A,B,C,D,E, n) \ do { \ @@ -517,6 +538,7 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) work += W[n & 15] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); \ if (n >= 16) \ work += W[n & 15] = rotl32(W[(n+13) & 15] ^ W[(n+8) & 15] ^ W[(n+2) & 15] ^ W[n & 15], 1); \ + DO_NOT_TRY_PROPAGATING(W[n & 15]); \ E += work + rotl32(A, 5) + rconsts[n / 20]; \ B = rotl32(B, 30); \ } while (0) |