From de6cb4bed82356db72af81890c7c26d7e85fb50d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 3 Feb 2022 15:11:23 +0100 Subject: libbb/sha256: code shrink in 32-bit x86 function old new delta sha256_process_block64_shaNI 747 722 -25 Signed-off-by: Denys Vlasenko --- libbb/hash_md5_sha256_x86-32_shaNI.S | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'libbb') diff --git a/libbb/hash_md5_sha256_x86-32_shaNI.S b/libbb/hash_md5_sha256_x86-32_shaNI.S index 56e37fa..632dab7 100644 --- a/libbb/hash_md5_sha256_x86-32_shaNI.S +++ b/libbb/hash_md5_sha256_x86-32_shaNI.S @@ -49,8 +49,7 @@ sha256_process_block64_shaNI: palignr $8, STATE1, STATE0 /* ABEF */ pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ -# mova128 PSHUFFLE_BSWAP32_FLIP_MASK, SHUF_MASK - lea K256, SHA256CONSTANTS + movl $K256+8*16, SHA256CONSTANTS /* Save hash values for addition after rounds */ mova128 STATE0, 0*16(%esp) @@ -60,7 +59,7 @@ sha256_process_block64_shaNI: movu128 0*16(DATA_PTR), MSG pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG mova128 MSG, MSGTMP0 - paddd 0*16(SHA256CONSTANTS), MSG + paddd 0*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 STATE1, STATE0 @@ -69,7 +68,7 @@ sha256_process_block64_shaNI: movu128 1*16(DATA_PTR), MSG pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG mova128 MSG, MSGTMP1 - paddd 1*16(SHA256CONSTANTS), MSG + paddd 1*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 STATE1, STATE0 @@ -79,7 +78,7 @@ sha256_process_block64_shaNI: movu128 2*16(DATA_PTR), MSG pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG mova128 MSG, MSGTMP2 - paddd 2*16(SHA256CONSTANTS), MSG + paddd 2*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 STATE1, STATE0 @@ -89,7 +88,7 @@ sha256_process_block64_shaNI: movu128 3*16(DATA_PTR), MSG pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG mova128 MSG, MSGTMP3 - paddd 3*16(SHA256CONSTANTS), MSG + paddd 3*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 mova128 MSGTMP3, MSGTMP4 palignr $4, MSGTMP2, MSGTMP4 @@ -101,7 +100,7 @@ sha256_process_block64_shaNI: /* Rounds 16-19 */ mova128 MSGTMP0, MSG - paddd 4*16(SHA256CONSTANTS), MSG + paddd 4*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 mova128 MSGTMP0, MSGTMP4 palignr $4, MSGTMP3, MSGTMP4 @@ -113,7 +112,7 @@ sha256_process_block64_shaNI: /* Rounds 20-23 */ mova128 MSGTMP1, MSG - paddd 5*16(SHA256CONSTANTS), MSG + paddd 5*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 mova128 MSGTMP1, MSGTMP4 palignr $4, MSGTMP0, MSGTMP4 @@ -125,7 +124,7 @@ sha256_process_block64_shaNI: /* Rounds 24-27 */ mova128 MSGTMP2, MSG - paddd 6*16(SHA256CONSTANTS), MSG + paddd 6*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 mova128 MSGTMP2, MSGTMP4 palignr $4, MSGTMP1, MSGTMP4 @@ -137,7 +136,7 @@ sha256_process_block64_shaNI: /* Rounds 28-31 */ mova128 MSGTMP3, MSG - paddd 7*16(SHA256CONSTANTS), MSG + paddd 7*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 mova128 MSGTMP3, MSGTMP4 palignr $4, MSGTMP2, MSGTMP4 @@ -149,7 +148,7 @@ sha256_process_block64_shaNI: /* Rounds 32-35 */ mova128 MSGTMP0, MSG - paddd 8*16(SHA256CONSTANTS), MSG + paddd 8*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 mova128 MSGTMP0, MSGTMP4 palignr $4, MSGTMP3, MSGTMP4 @@ -161,7 +160,7 @@ sha256_process_block64_shaNI: /* Rounds 36-39 */ mova128 MSGTMP1, MSG - paddd 9*16(SHA256CONSTANTS), MSG + paddd 9*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 mova128 MSGTMP1, MSGTMP4 palignr $4, MSGTMP0, MSGTMP4 @@ -173,7 +172,7 @@ sha256_process_block64_shaNI: /* Rounds 40-43 */ mova128 MSGTMP2, MSG - paddd 10*16(SHA256CONSTANTS), MSG + paddd 10*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 mova128 MSGTMP2, MSGTMP4 palignr $4, MSGTMP1, MSGTMP4 @@ -185,7 +184,7 @@ sha256_process_block64_shaNI: /* Rounds 44-47 */ mova128 MSGTMP3, MSG - paddd 11*16(SHA256CONSTANTS), MSG + paddd 11*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 mova128 MSGTMP3, MSGTMP4 palignr $4, MSGTMP2, MSGTMP4 @@ -197,7 +196,7 @@ sha256_process_block64_shaNI: /* Rounds 48-51 */ mova128 MSGTMP0, MSG - paddd 12*16(SHA256CONSTANTS), MSG + paddd 12*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 mova128 MSGTMP0, MSGTMP4 palignr $4, MSGTMP3, MSGTMP4 @@ -209,7 +208,7 @@ sha256_process_block64_shaNI: /* Rounds 52-55 */ mova128 MSGTMP1, MSG - paddd 13*16(SHA256CONSTANTS), MSG + paddd 13*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 mova128 MSGTMP1, MSGTMP4 palignr $4, MSGTMP0, MSGTMP4 @@ -220,7 +219,7 @@ sha256_process_block64_shaNI: /* Rounds 56-59 */ mova128 MSGTMP2, MSG - paddd 14*16(SHA256CONSTANTS), MSG + paddd 14*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 mova128 MSGTMP2, MSGTMP4 palignr $4, MSGTMP1, MSGTMP4 @@ -231,7 +230,7 @@ sha256_process_block64_shaNI: /* Rounds 60-63 */ mova128 MSGTMP3, MSG - paddd 15*16(SHA256CONSTANTS), MSG + paddd 15*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 STATE0, STATE1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 STATE1, STATE0 -- cgit v1.1