summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libbb/Config.src3
-rw-r--r--libbb/hash_md5_sha.c100
2 files changed, 98 insertions, 5 deletions
diff --git a/libbb/Config.src b/libbb/Config.src
index 13188ef..c793f59 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -60,7 +60,8 @@ config SHA1_SMALL
throughput MB/s size of sha1_process_block64
value 486 x86-64 486 x86-64
0 339 374 4149 4167
- 1,2,3 200 195 358 380
+ 1 224 229 654 732
+ 2,3 200 195 358 380
config SHA3_SMALL
int "SHA3: Trade bytes for speed (0:fast, 1:slow)"
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c
index 75673e3..053ebe2 100644
--- a/libbb/hash_md5_sha.c
+++ b/libbb/hash_md5_sha.c
@@ -514,9 +514,9 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
do { \
uint32_t work = EXPR(B, C, D); \
if (n <= 15) \
- work += W[n & 0xf] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); \
+ work += W[n & 15] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); \
if (n >= 16) \
- work += W[n & 0xf] = rotl32(W[(n+13) & 0xf] ^ W[(n+8) & 0xf] ^ W[(n+2) & 0xf] ^ W[n & 0xf], 1); \
+ work += W[n & 15] = rotl32(W[(n+13) & 15] ^ W[(n+8) & 15] ^ W[(n+2) & 15] ^ W[n & 15], 1); \
E += work + rotl32(A, 5) + rconsts[n / 20]; \
B = rotl32(B, 30); \
} while (0)
@@ -549,9 +549,101 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
ctx->hash[3] += d;
ctx->hash[4] += e;
}
-#else
-/* TODO: for CONFIG_SHA1_SMALL == 1, have a partially unrolled version? */
+#elif CONFIG_SHA1_SMALL == 1
+/* Middle-sized version, +300 bytes of code on x86. */
+static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
+{
+ static const uint32_t rconsts[] ALIGN4 = {
+ 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
+ };
+ int j;
+ int n;
+ uint32_t W[16+16];
+ uint32_t a, b, c, d, e;
+
+ a = ctx->hash[0];
+ b = ctx->hash[1];
+ c = ctx->hash[2];
+ d = ctx->hash[3];
+ e = ctx->hash[4];
+
+ /* 1st round of 20 operations */
+ n = 0;
+ do {
+ uint32_t work = ((c ^ d) & b) ^ d;
+ W[n] = W[n+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]);
+ work += W[n];
+ work += e + rotl32(a, 5) + rconsts[0];
+ /* Rotate by one for next time */
+ e = d;
+ d = c;
+ c = rotl32(b, 30);
+ b = a;
+ a = work;
+ n = (n + 1) & 15;
+ } while (n != 0);
+ do {
+ uint32_t work = ((c ^ d) & b) ^ d;
+ W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
+ work += W[n];
+ work += e + rotl32(a, 5) + rconsts[0];
+ e = d;
+ d = c;
+ c = rotl32(b, 30);
+ b = a;
+ a = work;
+ n = (n + 1) & 15;
+ } while (n != 4);
+ /* 2nd round of 20 operations */
+ j = 19;
+ do {
+ uint32_t work = c ^ d ^ b;
+ W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
+ work += W[n];
+ work += e + rotl32(a, 5) + rconsts[1];
+ e = d;
+ d = c;
+ c = rotl32(b, 30);
+ b = a;
+ a = work;
+ n = (n + 1) & 15;
+ } while (--j >= 0);
+ /* 3rd round */
+ j = 19;
+ do {
+ uint32_t work = ((b | c) & d) | (b & c);
+ W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
+ work += W[n];
+ work += e + rotl32(a, 5) + rconsts[2];
+ e = d;
+ d = c;
+ c = rotl32(b, 30);
+ b = a;
+ a = work;
+ n = (n + 1) & 15;
+ } while (--j >= 0);
+ /* 4th round */
+ j = 19;
+ do {
+ uint32_t work = c ^ d ^ b;
+ W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
+ work += W[n];
+ work += e + rotl32(a, 5) + rconsts[3];
+ e = d;
+ d = c;
+ c = rotl32(b, 30);
+ b = a;
+ a = work;
+ n = (n + 1) & 15;
+ } while (--j >= 0);
+ ctx->hash[0] += a;
+ ctx->hash[1] += b;
+ ctx->hash[2] += c;
+ ctx->hash[3] += d;
+ ctx->hash[4] += e;
+}
+#else
/* Compact version, almost twice as slow as fully unrolled */
static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
{