summaryrefslogtreecommitdiff
path: root/libbb
diff options
context:
space:
mode:
authorDenys Vlasenko2022-02-06 00:55:52 +0100
committerDenys Vlasenko2022-02-06 00:56:13 +0100
commit4f40735c87f8292a87c066b3b7099b0be007cf59 (patch)
tree26cd4b1adea86e0b7301a2899a8a4b38937541f1 /libbb
parent31c1c310772fa6c897ee1585ea15fc38f3ab3dff (diff)
downloadbusybox-4f40735c87f8292a87c066b3b7099b0be007cf59.zip
busybox-4f40735c87f8292a87c066b3b7099b0be007cf59.tar.gz
libbb/sha256: code shrink in 32-bit x86
function old new delta sha256_process_block64_shaNI 722 713 -9 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb')
-rw-r--r--libbb/hash_md5_sha256_x86-32_shaNI.S93
1 files changed, 48 insertions, 45 deletions
diff --git a/libbb/hash_md5_sha256_x86-32_shaNI.S b/libbb/hash_md5_sha256_x86-32_shaNI.S
index 632dab7..417da37 100644
--- a/libbb/hash_md5_sha256_x86-32_shaNI.S
+++ b/libbb/hash_md5_sha256_x86-32_shaNI.S
@@ -31,7 +31,7 @@
#define MSGTMP1 %xmm4
#define MSGTMP2 %xmm5
#define MSGTMP3 %xmm6
-#define MSGTMP4 %xmm7
+#define XMMTMP4 %xmm7
.balign 8 # allow decoders to fetch at least 3 first insns
sha256_process_block64_shaNI:
@@ -45,10 +45,12 @@ sha256_process_block64_shaNI:
shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */
shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */
- mova128 STATE0, MSGTMP4
+ mova128 STATE0, XMMTMP4
palignr $8, STATE1, STATE0 /* ABEF */
- pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */
+ pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */
+/* XMMTMP4 holds flip mask from here... */
+ mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP4
movl $K256+8*16, SHA256CONSTANTS
/* Save hash values for addition after rounds */
@@ -57,7 +59,7 @@ sha256_process_block64_shaNI:
/* Rounds 0-3 */
movu128 0*16(DATA_PTR), MSG
- pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG
+ pshufb XMMTMP4, MSG
mova128 MSG, MSGTMP0
paddd 0*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
@@ -66,7 +68,7 @@ sha256_process_block64_shaNI:
/* Rounds 4-7 */
movu128 1*16(DATA_PTR), MSG
- pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG
+ pshufb XMMTMP4, MSG
mova128 MSG, MSGTMP1
paddd 1*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
@@ -76,7 +78,7 @@ sha256_process_block64_shaNI:
/* Rounds 8-11 */
movu128 2*16(DATA_PTR), MSG
- pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG
+ pshufb XMMTMP4, MSG
mova128 MSG, MSGTMP2
paddd 2*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
@@ -86,13 +88,14 @@ sha256_process_block64_shaNI:
/* Rounds 12-15 */
movu128 3*16(DATA_PTR), MSG
- pshufb PSHUFFLE_BSWAP32_FLIP_MASK, MSG
+ pshufb XMMTMP4, MSG
+/* ...to here */
mova128 MSG, MSGTMP3
paddd 3*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- mova128 MSGTMP3, MSGTMP4
- palignr $4, MSGTMP2, MSGTMP4
- paddd MSGTMP4, MSGTMP0
+ mova128 MSGTMP3, XMMTMP4
+ palignr $4, MSGTMP2, XMMTMP4
+ paddd XMMTMP4, MSGTMP0
sha256msg2 MSGTMP3, MSGTMP0
shuf128_32 $0x0E, MSG, MSG
sha256rnds2 STATE1, STATE0
@@ -102,9 +105,9 @@ sha256_process_block64_shaNI:
mova128 MSGTMP0, MSG
paddd 4*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- mova128 MSGTMP0, MSGTMP4
- palignr $4, MSGTMP3, MSGTMP4
- paddd MSGTMP4, MSGTMP1
+ mova128 MSGTMP0, XMMTMP4
+ palignr $4, MSGTMP3, XMMTMP4
+ paddd XMMTMP4, MSGTMP1
sha256msg2 MSGTMP0, MSGTMP1
shuf128_32 $0x0E, MSG, MSG
sha256rnds2 STATE1, STATE0
@@ -114,9 +117,9 @@ sha256_process_block64_shaNI:
mova128 MSGTMP1, MSG
paddd 5*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- mova128 MSGTMP1, MSGTMP4
- palignr $4, MSGTMP0, MSGTMP4
- paddd MSGTMP4, MSGTMP2
+ mova128 MSGTMP1, XMMTMP4
+ palignr $4, MSGTMP0, XMMTMP4
+ paddd XMMTMP4, MSGTMP2
sha256msg2 MSGTMP1, MSGTMP2
shuf128_32 $0x0E, MSG, MSG
sha256rnds2 STATE1, STATE0
@@ -126,9 +129,9 @@ sha256_process_block64_shaNI:
mova128 MSGTMP2, MSG
paddd 6*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- mova128 MSGTMP2, MSGTMP4
- palignr $4, MSGTMP1, MSGTMP4
- paddd MSGTMP4, MSGTMP3
+ mova128 MSGTMP2, XMMTMP4
+ palignr $4, MSGTMP1, XMMTMP4
+ paddd XMMTMP4, MSGTMP3
sha256msg2 MSGTMP2, MSGTMP3
shuf128_32 $0x0E, MSG, MSG
sha256rnds2 STATE1, STATE0
@@ -138,9 +141,9 @@ sha256_process_block64_shaNI:
mova128 MSGTMP3, MSG
paddd 7*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- mova128 MSGTMP3, MSGTMP4
- palignr $4, MSGTMP2, MSGTMP4
- paddd MSGTMP4, MSGTMP0
+ mova128 MSGTMP3, XMMTMP4
+ palignr $4, MSGTMP2, XMMTMP4
+ paddd XMMTMP4, MSGTMP0
sha256msg2 MSGTMP3, MSGTMP0
shuf128_32 $0x0E, MSG, MSG
sha256rnds2 STATE1, STATE0
@@ -150,9 +153,9 @@ sha256_process_block64_shaNI:
mova128 MSGTMP0, MSG
paddd 8*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- mova128 MSGTMP0, MSGTMP4
- palignr $4, MSGTMP3, MSGTMP4
- paddd MSGTMP4, MSGTMP1
+ mova128 MSGTMP0, XMMTMP4
+ palignr $4, MSGTMP3, XMMTMP4
+ paddd XMMTMP4, MSGTMP1
sha256msg2 MSGTMP0, MSGTMP1
shuf128_32 $0x0E, MSG, MSG
sha256rnds2 STATE1, STATE0
@@ -162,9 +165,9 @@ sha256_process_block64_shaNI:
mova128 MSGTMP1, MSG
paddd 9*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- mova128 MSGTMP1, MSGTMP4
- palignr $4, MSGTMP0, MSGTMP4
- paddd MSGTMP4, MSGTMP2
+ mova128 MSGTMP1, XMMTMP4
+ palignr $4, MSGTMP0, XMMTMP4
+ paddd XMMTMP4, MSGTMP2
sha256msg2 MSGTMP1, MSGTMP2
shuf128_32 $0x0E, MSG, MSG
sha256rnds2 STATE1, STATE0
@@ -174,9 +177,9 @@ sha256_process_block64_shaNI:
mova128 MSGTMP2, MSG
paddd 10*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- mova128 MSGTMP2, MSGTMP4
- palignr $4, MSGTMP1, MSGTMP4
- paddd MSGTMP4, MSGTMP3
+ mova128 MSGTMP2, XMMTMP4
+ palignr $4, MSGTMP1, XMMTMP4
+ paddd XMMTMP4, MSGTMP3
sha256msg2 MSGTMP2, MSGTMP3
shuf128_32 $0x0E, MSG, MSG
sha256rnds2 STATE1, STATE0
@@ -186,9 +189,9 @@ sha256_process_block64_shaNI:
mova128 MSGTMP3, MSG
paddd 11*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- mova128 MSGTMP3, MSGTMP4
- palignr $4, MSGTMP2, MSGTMP4
- paddd MSGTMP4, MSGTMP0
+ mova128 MSGTMP3, XMMTMP4
+ palignr $4, MSGTMP2, XMMTMP4
+ paddd XMMTMP4, MSGTMP0
sha256msg2 MSGTMP3, MSGTMP0
shuf128_32 $0x0E, MSG, MSG
sha256rnds2 STATE1, STATE0
@@ -198,9 +201,9 @@ sha256_process_block64_shaNI:
mova128 MSGTMP0, MSG
paddd 12*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- mova128 MSGTMP0, MSGTMP4
- palignr $4, MSGTMP3, MSGTMP4
- paddd MSGTMP4, MSGTMP1
+ mova128 MSGTMP0, XMMTMP4
+ palignr $4, MSGTMP3, XMMTMP4
+ paddd XMMTMP4, MSGTMP1
sha256msg2 MSGTMP0, MSGTMP1
shuf128_32 $0x0E, MSG, MSG
sha256rnds2 STATE1, STATE0
@@ -210,9 +213,9 @@ sha256_process_block64_shaNI:
mova128 MSGTMP1, MSG
paddd 13*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- mova128 MSGTMP1, MSGTMP4
- palignr $4, MSGTMP0, MSGTMP4
- paddd MSGTMP4, MSGTMP2
+ mova128 MSGTMP1, XMMTMP4
+ palignr $4, MSGTMP0, XMMTMP4
+ paddd XMMTMP4, MSGTMP2
sha256msg2 MSGTMP1, MSGTMP2
shuf128_32 $0x0E, MSG, MSG
sha256rnds2 STATE1, STATE0
@@ -221,9 +224,9 @@ sha256_process_block64_shaNI:
mova128 MSGTMP2, MSG
paddd 14*16-8*16(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
- mova128 MSGTMP2, MSGTMP4
- palignr $4, MSGTMP1, MSGTMP4
- paddd MSGTMP4, MSGTMP3
+ mova128 MSGTMP2, XMMTMP4
+ palignr $4, MSGTMP1, XMMTMP4
+ paddd XMMTMP4, MSGTMP3
sha256msg2 MSGTMP2, MSGTMP3
shuf128_32 $0x0E, MSG, MSG
sha256rnds2 STATE1, STATE0
@@ -242,9 +245,9 @@ sha256_process_block64_shaNI:
/* Write hash values back in the correct order */
shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */
shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */
- mova128 STATE0, MSGTMP4
+ mova128 STATE0, XMMTMP4
pblendw $0xF0, STATE1, STATE0 /* DCBA */
- palignr $8, MSGTMP4, STATE1 /* HGFE */
+ palignr $8, XMMTMP4, STATE1 /* HGFE */
movu128 STATE0, 76+0*16(%eax)
movu128 STATE1, 76+1*16(%eax)