From db15cb72e2dad0e2dbed77665ac848a49b950038 Mon Sep 17 00:00:00 2001 From: Eric Andersen Date: Fri, 29 Jun 2001 20:44:51 +0000 Subject: A really nice patch from Manuel Novoa III for compile time configurable size/speed tradeoffs. --- coreutils/md5sum.c | 186 +++++++++++++++++++++++++++++++++++++++++++++++++++++ md5sum.c | 186 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 372 insertions(+) diff --git a/coreutils/md5sum.c b/coreutils/md5sum.c index dcb05c1..643f827 100644 --- a/coreutils/md5sum.c +++ b/coreutils/md5sum.c @@ -20,6 +20,24 @@ /* Written by Ulrich Drepper */ /* Hacked to work with BusyBox by Alfred M. Szmidt */ +/* + * June 29, 2001 Manuel Novoa III + * + * Added MD5SUM_SIZE_VS_SPEED configuration option. + * + * Current valid values, with data from my system for comparison, are: + * (using uClibc and running on linux-2.4.4.tar.bz2) + * user times (sec) text size (386) + * 0 (fastest) 1.1 6144 + * 1 1.4 5392 + * 2 3.0 5088 + * 3 (smallest) 5.1 4912 + */ + +#define MD5SUM_SIZE_VS_SPEED 2 + +/**********************************************************************/ + #include #include #include @@ -184,9 +202,11 @@ extern void *md5_buffer __P ((const char *buffer, size_t len, void *resblock)); +#if MD5SUM_SIZE_VS_SPEED == 0 /* This array contains the bytes used to pad the buffer to the next 64-byte boundary. (RFC 1321, 3.1: Step 1) */ static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ }; +#endif /* Initialize structure containing state of computation. (RFC 1321, 3.3: Step 3) */ @@ -233,7 +253,12 @@ void *md5_finish_ctx(struct md5_ctx *ctx, void *resbuf) ++ctx->total[1]; pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes; +#if MD5SUM_SIZE_VS_SPEED > 0 + memset(&ctx->buffer[bytes], 0, pad); + ctx->buffer[bytes] = 0x80; +#else memcpy(&ctx->buffer[bytes], fillbuf, pad); +#endif /* Put the 64-bit file length in *bits* at the end of the buffer. */ *(md5_uint32 *) & ctx->buffer[bytes + pad] = SWAP(ctx->total[0] << 3); @@ -369,6 +394,49 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) const md5_uint32 *words = buffer; size_t nwords = len / sizeof(md5_uint32); const md5_uint32 *endp = words + nwords; +#if MD5SUM_SIZE_VS_SPEED > 0 + static const md5_uint32 C_array[] = { + /* round 1 */ + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, + 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, + 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + /* round 2 */ + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, + 0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, + 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + /* round 3 */ + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, + 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05, + 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + /* round 4 */ + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, + 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, + 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 + }; + + static const char P_array[] = { +#if MD5SUM_SIZE_VS_SPEED > 1 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */ +#endif + 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */ + 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */ + 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */ + }; + +#if MD5SUM_SIZE_VS_SPEED > 1 + static const char S_array[] = { + 7, 12, 17, 22, + 5, 9, 14, 20, + 4, 11, 16, 23, + 6, 10, 15, 21 + }; +#endif +#endif + md5_uint32 A = ctx->A; md5_uint32 B = ctx->B; md5_uint32 C = ctx->C; @@ -390,6 +458,79 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) md5_uint32 C_save = C; md5_uint32 D_save = D; +#if MD5SUM_SIZE_VS_SPEED > 1 +#define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s))) + + const md5_uint32 *pc; + const char *pp; + const char *ps; + int i; + md5_uint32 temp; + + for ( i=0 ; i < 16 ; i++ ) { + cwp[i] = SWAP(words[i]); + } + words += 16; + +#if MD5SUM_SIZE_VS_SPEED > 2 + pc = C_array; pp = P_array; ps = S_array - 4; + + for ( i = 0 ; i < 64 ; i++ ) { + if ((i&0x0f) == 0) ps += 4; + temp = A; + switch (i>>4) { + case 0: + temp += FF(B,C,D); + break; + case 1: + temp += FG(B,C,D); + break; + case 2: + temp += FH(B,C,D); + break; + case 3: + temp += FI(B,C,D); + break; + } + temp += cwp[(int)(*pp++)] + *pc++; + temp = CYCLIC (temp, ps[i&3]); + temp += B; + A = D; D = C; C = B; B = temp; + } +#else + pc = C_array; pp = P_array; ps = S_array; + + for ( i = 0 ; i < 16 ; i++ ) { + temp = A + FF(B,C,D) + cwp[(int)(*pp++)] + *pc++; + temp = CYCLIC (temp, ps[i&3]); + temp += B; + A = D; D = C; C = B; B = temp; + } + + ps += 4; + for ( i = 0 ; i < 16 ; i++ ) { + temp = A + FG(B,C,D) + cwp[(int)(*pp++)] + *pc++; + temp = CYCLIC (temp, ps[i&3]); + temp += B; + A = D; D = C; C = B; B = temp; + } + ps += 4; + for ( i = 0 ; i < 16 ; i++ ) { + temp = A + FH(B,C,D) + cwp[(int)(*pp++)] + *pc++; + temp = CYCLIC (temp, ps[i&3]); + temp += B; + A = D; D = C; C = B; B = temp; + } + ps += 4; + for ( i = 0 ; i < 16 ; i++ ) { + temp = A + FI(B,C,D) + cwp[(int)(*pp++)] + *pc++; + temp = CYCLIC (temp, ps[i&3]); + temp += B; + A = D; D = C; C = B; B = temp; + } + +#endif +#else /* First round: using the given function, the context and a constant the next context is computed. Because the algorithms processing unit is a 32-bit word and it is determined to work on words in @@ -417,7 +558,22 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64 */ +#if MD5SUM_SIZE_VS_SPEED == 1 + const md5_uint32 *pc; + const char *pp; + int i; +#endif + /* Round 1. */ +#if MD5SUM_SIZE_VS_SPEED == 1 + pc = C_array; + for ( i=0 ; i < 4 ; i++ ) { + OP(A, B, C, D, 7, *pc++); + OP(D, A, B, C, 12, *pc++); + OP(C, D, A, B, 17, *pc++); + OP(B, C, D, A, 22, *pc++); + } +#else OP(A, B, C, D, 7, 0xd76aa478); OP(D, A, B, C, 12, 0xe8c7b756); OP(C, D, A, B, 17, 0x242070db); @@ -434,6 +590,7 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) OP(D, A, B, C, 12, 0xfd987193); OP(C, D, A, B, 17, 0xa679438e); OP(B, C, D, A, 22, 0x49b40821); +#endif /* For the second to fourth round we have the possibly swapped words in CORRECT_WORDS. Redefine the macro to take an additional first @@ -449,6 +606,15 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) while (0) /* Round 2. */ +#if MD5SUM_SIZE_VS_SPEED == 1 + pp = P_array; + for ( i=0 ; i < 4 ; i++ ) { + OP(FG, A, B, C, D, (int)(*pp++), 5, *pc++); + OP(FG, D, A, B, C, (int)(*pp++), 9, *pc++); + OP(FG, C, D, A, B, (int)(*pp++), 14, *pc++); + OP(FG, B, C, D, A, (int)(*pp++), 20, *pc++); + } +#else OP(FG, A, B, C, D, 1, 5, 0xf61e2562); OP(FG, D, A, B, C, 6, 9, 0xc040b340); OP(FG, C, D, A, B, 11, 14, 0x265e5a51); @@ -465,8 +631,17 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8); OP(FG, C, D, A, B, 7, 14, 0x676f02d9); OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a); +#endif /* Round 3. */ +#if MD5SUM_SIZE_VS_SPEED == 1 + for ( i=0 ; i < 4 ; i++ ) { + OP(FH, A, B, C, D, (int)(*pp++), 4, *pc++); + OP(FH, D, A, B, C, (int)(*pp++), 11, *pc++); + OP(FH, C, D, A, B, (int)(*pp++), 16, *pc++); + OP(FH, B, C, D, A, (int)(*pp++), 23, *pc++); + } +#else OP(FH, A, B, C, D, 5, 4, 0xfffa3942); OP(FH, D, A, B, C, 8, 11, 0x8771f681); OP(FH, C, D, A, B, 11, 16, 0x6d9d6122); @@ -483,8 +658,17 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) OP(FH, D, A, B, C, 12, 11, 0xe6db99e5); OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8); OP(FH, B, C, D, A, 2, 23, 0xc4ac5665); +#endif /* Round 4. */ +#if MD5SUM_SIZE_VS_SPEED == 1 + for ( i=0 ; i < 4 ; i++ ) { + OP(FI, A, B, C, D, (int)(*pp++), 6, *pc++); + OP(FI, D, A, B, C, (int)(*pp++), 10, *pc++); + OP(FI, C, D, A, B, (int)(*pp++), 15, *pc++); + OP(FI, B, C, D, A, (int)(*pp++), 21, *pc++); + } +#else OP(FI, A, B, C, D, 0, 6, 0xf4292244); OP(FI, D, A, B, C, 7, 10, 0x432aff97); OP(FI, C, D, A, B, 14, 15, 0xab9423a7); @@ -501,6 +685,8 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) OP(FI, D, A, B, C, 11, 10, 0xbd3af235); OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb); OP(FI, B, C, D, A, 9, 21, 0xeb86d391); +#endif +#endif /* Add the starting values of the context. */ A += A_save; diff --git a/md5sum.c b/md5sum.c index dcb05c1..643f827 100644 --- a/md5sum.c +++ b/md5sum.c @@ -20,6 +20,24 @@ /* Written by Ulrich Drepper */ /* Hacked to work with BusyBox by Alfred M. Szmidt */ +/* + * June 29, 2001 Manuel Novoa III + * + * Added MD5SUM_SIZE_VS_SPEED configuration option. + * + * Current valid values, with data from my system for comparison, are: + * (using uClibc and running on linux-2.4.4.tar.bz2) + * user times (sec) text size (386) + * 0 (fastest) 1.1 6144 + * 1 1.4 5392 + * 2 3.0 5088 + * 3 (smallest) 5.1 4912 + */ + +#define MD5SUM_SIZE_VS_SPEED 2 + +/**********************************************************************/ + #include #include #include @@ -184,9 +202,11 @@ extern void *md5_buffer __P ((const char *buffer, size_t len, void *resblock)); +#if MD5SUM_SIZE_VS_SPEED == 0 /* This array contains the bytes used to pad the buffer to the next 64-byte boundary. (RFC 1321, 3.1: Step 1) */ static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ }; +#endif /* Initialize structure containing state of computation. (RFC 1321, 3.3: Step 3) */ @@ -233,7 +253,12 @@ void *md5_finish_ctx(struct md5_ctx *ctx, void *resbuf) ++ctx->total[1]; pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes; +#if MD5SUM_SIZE_VS_SPEED > 0 + memset(&ctx->buffer[bytes], 0, pad); + ctx->buffer[bytes] = 0x80; +#else memcpy(&ctx->buffer[bytes], fillbuf, pad); +#endif /* Put the 64-bit file length in *bits* at the end of the buffer. */ *(md5_uint32 *) & ctx->buffer[bytes + pad] = SWAP(ctx->total[0] << 3); @@ -369,6 +394,49 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) const md5_uint32 *words = buffer; size_t nwords = len / sizeof(md5_uint32); const md5_uint32 *endp = words + nwords; +#if MD5SUM_SIZE_VS_SPEED > 0 + static const md5_uint32 C_array[] = { + /* round 1 */ + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, + 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, + 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + /* round 2 */ + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, + 0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, + 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + /* round 3 */ + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, + 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05, + 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + /* round 4 */ + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, + 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, + 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 + }; + + static const char P_array[] = { +#if MD5SUM_SIZE_VS_SPEED > 1 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */ +#endif + 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */ + 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */ + 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */ + }; + +#if MD5SUM_SIZE_VS_SPEED > 1 + static const char S_array[] = { + 7, 12, 17, 22, + 5, 9, 14, 20, + 4, 11, 16, 23, + 6, 10, 15, 21 + }; +#endif +#endif + md5_uint32 A = ctx->A; md5_uint32 B = ctx->B; md5_uint32 C = ctx->C; @@ -390,6 +458,79 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) md5_uint32 C_save = C; md5_uint32 D_save = D; +#if MD5SUM_SIZE_VS_SPEED > 1 +#define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s))) + + const md5_uint32 *pc; + const char *pp; + const char *ps; + int i; + md5_uint32 temp; + + for ( i=0 ; i < 16 ; i++ ) { + cwp[i] = SWAP(words[i]); + } + words += 16; + +#if MD5SUM_SIZE_VS_SPEED > 2 + pc = C_array; pp = P_array; ps = S_array - 4; + + for ( i = 0 ; i < 64 ; i++ ) { + if ((i&0x0f) == 0) ps += 4; + temp = A; + switch (i>>4) { + case 0: + temp += FF(B,C,D); + break; + case 1: + temp += FG(B,C,D); + break; + case 2: + temp += FH(B,C,D); + break; + case 3: + temp += FI(B,C,D); + break; + } + temp += cwp[(int)(*pp++)] + *pc++; + temp = CYCLIC (temp, ps[i&3]); + temp += B; + A = D; D = C; C = B; B = temp; + } +#else + pc = C_array; pp = P_array; ps = S_array; + + for ( i = 0 ; i < 16 ; i++ ) { + temp = A + FF(B,C,D) + cwp[(int)(*pp++)] + *pc++; + temp = CYCLIC (temp, ps[i&3]); + temp += B; + A = D; D = C; C = B; B = temp; + } + + ps += 4; + for ( i = 0 ; i < 16 ; i++ ) { + temp = A + FG(B,C,D) + cwp[(int)(*pp++)] + *pc++; + temp = CYCLIC (temp, ps[i&3]); + temp += B; + A = D; D = C; C = B; B = temp; + } + ps += 4; + for ( i = 0 ; i < 16 ; i++ ) { + temp = A + FH(B,C,D) + cwp[(int)(*pp++)] + *pc++; + temp = CYCLIC (temp, ps[i&3]); + temp += B; + A = D; D = C; C = B; B = temp; + } + ps += 4; + for ( i = 0 ; i < 16 ; i++ ) { + temp = A + FI(B,C,D) + cwp[(int)(*pp++)] + *pc++; + temp = CYCLIC (temp, ps[i&3]); + temp += B; + A = D; D = C; C = B; B = temp; + } + +#endif +#else /* First round: using the given function, the context and a constant the next context is computed. Because the algorithms processing unit is a 32-bit word and it is determined to work on words in @@ -417,7 +558,22 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64 */ +#if MD5SUM_SIZE_VS_SPEED == 1 + const md5_uint32 *pc; + const char *pp; + int i; +#endif + /* Round 1. */ +#if MD5SUM_SIZE_VS_SPEED == 1 + pc = C_array; + for ( i=0 ; i < 4 ; i++ ) { + OP(A, B, C, D, 7, *pc++); + OP(D, A, B, C, 12, *pc++); + OP(C, D, A, B, 17, *pc++); + OP(B, C, D, A, 22, *pc++); + } +#else OP(A, B, C, D, 7, 0xd76aa478); OP(D, A, B, C, 12, 0xe8c7b756); OP(C, D, A, B, 17, 0x242070db); @@ -434,6 +590,7 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) OP(D, A, B, C, 12, 0xfd987193); OP(C, D, A, B, 17, 0xa679438e); OP(B, C, D, A, 22, 0x49b40821); +#endif /* For the second to fourth round we have the possibly swapped words in CORRECT_WORDS. Redefine the macro to take an additional first @@ -449,6 +606,15 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) while (0) /* Round 2. */ +#if MD5SUM_SIZE_VS_SPEED == 1 + pp = P_array; + for ( i=0 ; i < 4 ; i++ ) { + OP(FG, A, B, C, D, (int)(*pp++), 5, *pc++); + OP(FG, D, A, B, C, (int)(*pp++), 9, *pc++); + OP(FG, C, D, A, B, (int)(*pp++), 14, *pc++); + OP(FG, B, C, D, A, (int)(*pp++), 20, *pc++); + } +#else OP(FG, A, B, C, D, 1, 5, 0xf61e2562); OP(FG, D, A, B, C, 6, 9, 0xc040b340); OP(FG, C, D, A, B, 11, 14, 0x265e5a51); @@ -465,8 +631,17 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8); OP(FG, C, D, A, B, 7, 14, 0x676f02d9); OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a); +#endif /* Round 3. */ +#if MD5SUM_SIZE_VS_SPEED == 1 + for ( i=0 ; i < 4 ; i++ ) { + OP(FH, A, B, C, D, (int)(*pp++), 4, *pc++); + OP(FH, D, A, B, C, (int)(*pp++), 11, *pc++); + OP(FH, C, D, A, B, (int)(*pp++), 16, *pc++); + OP(FH, B, C, D, A, (int)(*pp++), 23, *pc++); + } +#else OP(FH, A, B, C, D, 5, 4, 0xfffa3942); OP(FH, D, A, B, C, 8, 11, 0x8771f681); OP(FH, C, D, A, B, 11, 16, 0x6d9d6122); @@ -483,8 +658,17 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) OP(FH, D, A, B, C, 12, 11, 0xe6db99e5); OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8); OP(FH, B, C, D, A, 2, 23, 0xc4ac5665); +#endif /* Round 4. */ +#if MD5SUM_SIZE_VS_SPEED == 1 + for ( i=0 ; i < 4 ; i++ ) { + OP(FI, A, B, C, D, (int)(*pp++), 6, *pc++); + OP(FI, D, A, B, C, (int)(*pp++), 10, *pc++); + OP(FI, C, D, A, B, (int)(*pp++), 15, *pc++); + OP(FI, B, C, D, A, (int)(*pp++), 21, *pc++); + } +#else OP(FI, A, B, C, D, 0, 6, 0xf4292244); OP(FI, D, A, B, C, 7, 10, 0x432aff97); OP(FI, C, D, A, B, 14, 15, 0xab9423a7); @@ -501,6 +685,8 @@ void md5_process_block(const void *buffer, size_t len, struct md5_ctx *ctx) OP(FI, D, A, B, C, 11, 10, 0xbd3af235); OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb); OP(FI, B, C, D, A, 9, 21, 0xeb86d391); +#endif +#endif /* Add the starting values of the context. */ A += A_save; -- cgit v1.1