diff options
Diffstat (limited to 'libbb')
-rw-r--r-- | libbb/hash_md5_sha_x86-32_shaNI.S | 37 | ||||
-rw-r--r-- | libbb/hash_md5_sha_x86-64_shaNI.S | 24 |
2 files changed, 29 insertions, 32 deletions
diff --git a/libbb/hash_md5_sha_x86-32_shaNI.S b/libbb/hash_md5_sha_x86-32_shaNI.S index 5d082eb..0f3fe57 100644 --- a/libbb/hash_md5_sha_x86-32_shaNI.S +++ b/libbb/hash_md5_sha_x86-32_shaNI.S @@ -32,14 +32,10 @@ #define MSG1 %xmm4 #define MSG2 %xmm5 #define MSG3 %xmm6 -#define SHUF_MASK %xmm7 - .balign 8 # allow decoders to fetch at least 3 first insns + .balign 8 # allow decoders to fetch at least 2 first insns sha1_process_block64_shaNI: - pushl %ebp - movl %esp, %ebp - subl $32, %esp - andl $~0xF, %esp # paddd needs aligned memory operand + subl $16, %esp /* load initial hash values */ xor128 E0, E0 @@ -47,30 +43,33 @@ sha1_process_block64_shaNI: pinsrd $3, 76+4*4(%eax), E0 # load to uppermost 32-bit word shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD - mova128 PSHUFFLE_BYTE_FLIP_MASK, SHUF_MASK + mova128 PSHUFFLE_BYTE_FLIP_MASK, %xmm7 + + movu128 0*16(%eax), MSG0 + pshufb %xmm7, MSG0 + movu128 1*16(%eax), MSG1 + pshufb %xmm7, MSG1 + movu128 2*16(%eax), MSG2 + pshufb %xmm7, MSG2 + movu128 3*16(%eax), MSG3 + pshufb %xmm7, MSG3 /* Save hash values for addition after rounds */ - movu128 E0, 16(%esp) + movu128 E0, %xmm7 movu128 ABCD, (%esp) /* Rounds 0-3 */ - movu128 0*16(%eax), MSG0 - pshufb SHUF_MASK, MSG0 paddd MSG0, E0 mova128 ABCD, E1 sha1rnds4 $0, E0, ABCD /* Rounds 4-7 */ - movu128 1*16(%eax), MSG1 - pshufb SHUF_MASK, MSG1 sha1nexte MSG1, E1 mova128 ABCD, E0 sha1rnds4 $0, E1, ABCD sha1msg1 MSG1, MSG0 /* Rounds 8-11 */ - movu128 2*16(%eax), MSG2 - pshufb SHUF_MASK, MSG2 sha1nexte MSG2, E0 mova128 ABCD, E1 sha1rnds4 $0, E0, ABCD @@ -78,8 +77,6 @@ sha1_process_block64_shaNI: xor128 MSG2, MSG0 /* Rounds 12-15 */ - movu128 3*16(%eax), MSG3 - pshufb SHUF_MASK, MSG3 sha1nexte MSG3, E1 mova128 ABCD, E0 sha1msg2 MSG3, MSG0 @@ -210,16 +207,16 @@ sha1_process_block64_shaNI: sha1rnds4 $3, E1, ABCD /* Add current hash values with previously saved */ - sha1nexte 16(%esp), E0 - paddd (%esp), ABCD + sha1nexte %xmm7, E0 + movu128 (%esp), %xmm7 + paddd %xmm7, ABCD /* Write hash values back in the correct order */ shuf128_32 $0x1B, ABCD, ABCD movu128 ABCD, 76(%eax) extr128_32 $3, E0, 76+4*4(%eax) - movl %ebp, %esp - popl %ebp + addl $16, %esp ret .size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI diff --git a/libbb/hash_md5_sha_x86-64_shaNI.S b/libbb/hash_md5_sha_x86-64_shaNI.S index 8ddec87..fc2ca92 100644 --- a/libbb/hash_md5_sha_x86-64_shaNI.S +++ b/libbb/hash_md5_sha_x86-64_shaNI.S @@ -32,7 +32,6 @@ #define MSG1 %xmm4 #define MSG2 %xmm5 #define MSG3 %xmm6 -#define SHUF_MASK %xmm7 .balign 8 # allow decoders to fetch at least 2 first insns sha1_process_block64_shaNI: @@ -43,30 +42,33 @@ sha1_process_block64_shaNI: pinsrd $3, 80+4*4(%rdi), E0 # load to uppermost 32-bit word shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD - mova128 PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK + mova128 PSHUFFLE_BYTE_FLIP_MASK(%rip), %xmm7 + + movu128 0*16(%rdi), MSG0 + pshufb %xmm7, MSG0 + movu128 1*16(%rdi), MSG1 + pshufb %xmm7, MSG1 + movu128 2*16(%rdi), MSG2 + pshufb %xmm7, MSG2 + movu128 3*16(%rdi), MSG3 + pshufb %xmm7, MSG3 /* Save hash values for addition after rounds */ - mova128 E0, %xmm9 + mova128 E0, %xmm7 mova128 ABCD, %xmm8 /* Rounds 0-3 */ - movu128 0*16(%rdi), MSG0 - pshufb SHUF_MASK, MSG0 paddd MSG0, E0 mova128 ABCD, E1 sha1rnds4 $0, E0, ABCD /* Rounds 4-7 */ - movu128 1*16(%rdi), MSG1 - pshufb SHUF_MASK, MSG1 sha1nexte MSG1, E1 mova128 ABCD, E0 sha1rnds4 $0, E1, ABCD sha1msg1 MSG1, MSG0 /* Rounds 8-11 */ - movu128 2*16(%rdi), MSG2 - pshufb SHUF_MASK, MSG2 sha1nexte MSG2, E0 mova128 ABCD, E1 sha1rnds4 $0, E0, ABCD @@ -74,8 +76,6 @@ sha1_process_block64_shaNI: xor128 MSG2, MSG0 /* Rounds 12-15 */ - movu128 3*16(%rdi), MSG3 - pshufb SHUF_MASK, MSG3 sha1nexte MSG3, E1 mova128 ABCD, E0 sha1msg2 MSG3, MSG0 @@ -206,7 +206,7 @@ sha1_process_block64_shaNI: sha1rnds4 $3, E1, ABCD /* Add current hash values with previously saved */ - sha1nexte %xmm9, E0 + sha1nexte %xmm7, E0 paddd %xmm8, ABCD /* Write hash values back in the correct order */ |