diff options
author | Denys Vlasenko | 2021-12-30 18:54:02 +0100 |
---|---|---|
committer | Denys Vlasenko | 2021-12-30 18:54:02 +0100 |
commit | 0b62a08777e29c34f947c791a1eded5b97e05699 (patch) | |
tree | c411bf4bd5f5d2dd6821287696b5866f595134fe /libbb | |
parent | 25aadc893d21b35f7d34a9d1edc843632e7abd8f (diff) | |
download | busybox-0b62a08777e29c34f947c791a1eded5b97e05699.zip busybox-0b62a08777e29c34f947c791a1eded5b97e05699.tar.gz |
libbb/sha1: add config-selectable partially unrolled version
function old new delta
sha1_process_block64 364 732 +368
static.rconsts 16 - -16
------------------------------------------------------------------------------
(add/remove: 0/1 grow/shrink: 1/0 up/down: 368/-16) Total: 352 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb')
-rw-r--r-- | libbb/Config.src | 3 | ||||
-rw-r--r-- | libbb/hash_md5_sha.c | 100 |
2 files changed, 98 insertions, 5 deletions
diff --git a/libbb/Config.src b/libbb/Config.src index 13188ef..c793f59 100644 --- a/libbb/Config.src +++ b/libbb/Config.src @@ -60,7 +60,8 @@ config SHA1_SMALL throughput MB/s size of sha1_process_block64 value 486 x86-64 486 x86-64 0 339 374 4149 4167 - 1,2,3 200 195 358 380 + 1 224 229 654 732 + 2,3 200 195 358 380 config SHA3_SMALL int "SHA3: Trade bytes for speed (0:fast, 1:slow)" diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 75673e3..053ebe2 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -514,9 +514,9 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) do { \ uint32_t work = EXPR(B, C, D); \ if (n <= 15) \ - work += W[n & 0xf] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); \ + work += W[n & 15] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); \ if (n >= 16) \ - work += W[n & 0xf] = rotl32(W[(n+13) & 0xf] ^ W[(n+8) & 0xf] ^ W[(n+2) & 0xf] ^ W[n & 0xf], 1); \ + work += W[n & 15] = rotl32(W[(n+13) & 15] ^ W[(n+8) & 15] ^ W[(n+2) & 15] ^ W[n & 15], 1); \ E += work + rotl32(A, 5) + rconsts[n / 20]; \ B = rotl32(B, 30); \ } while (0) @@ -549,9 +549,101 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) ctx->hash[3] += d; ctx->hash[4] += e; } -#else -/* TODO: for CONFIG_SHA1_SMALL == 1, have a partially unrolled version? */ +#elif CONFIG_SHA1_SMALL == 1 +/* Middle-sized version, +300 bytes of code on x86. */ +static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) +{ + static const uint32_t rconsts[] ALIGN4 = { + 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6 + }; + int j; + int n; + uint32_t W[16+16]; + uint32_t a, b, c, d, e; + + a = ctx->hash[0]; + b = ctx->hash[1]; + c = ctx->hash[2]; + d = ctx->hash[3]; + e = ctx->hash[4]; + + /* 1st round of 20 operations */ + n = 0; + do { + uint32_t work = ((c ^ d) & b) ^ d; + W[n] = W[n+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); + work += W[n]; + work += e + rotl32(a, 5) + rconsts[0]; + /* Rotate by one for next time */ + e = d; + d = c; + c = rotl32(b, 30); + b = a; + a = work; + n = (n + 1) & 15; + } while (n != 0); + do { + uint32_t work = ((c ^ d) & b) ^ d; + W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1); + work += W[n]; + work += e + rotl32(a, 5) + rconsts[0]; + e = d; + d = c; + c = rotl32(b, 30); + b = a; + a = work; + n = (n + 1) & 15; + } while (n != 4); + /* 2nd round of 20 operations */ + j = 19; + do { + uint32_t work = c ^ d ^ b; + W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1); + work += W[n]; + work += e + rotl32(a, 5) + rconsts[1]; + e = d; + d = c; + c = rotl32(b, 30); + b = a; + a = work; + n = (n + 1) & 15; + } while (--j >= 0); + /* 3rd round */ + j = 19; + do { + uint32_t work = ((b | c) & d) | (b & c); + W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1); + work += W[n]; + work += e + rotl32(a, 5) + rconsts[2]; + e = d; + d = c; + c = rotl32(b, 30); + b = a; + a = work; + n = (n + 1) & 15; + } while (--j >= 0); + /* 4th round */ + j = 19; + do { + uint32_t work = c ^ d ^ b; + W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1); + work += W[n]; + work += e + rotl32(a, 5) + rconsts[3]; + e = d; + d = c; + c = rotl32(b, 30); + b = a; + a = work; + n = (n + 1) & 15; + } while (--j >= 0); + ctx->hash[0] += a; + ctx->hash[1] += b; + ctx->hash[2] += c; + ctx->hash[3] += d; + ctx->hash[4] += e; +} +#else /* Compact version, almost twice as slow as fully unrolled */ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) { |