summaryrefslogtreecommitdiff
path: root/libbb/hash_md5_sha_x86-64.S.sh
diff options
context:
space:
mode:
authorDenys Vlasenko2022-02-07 02:06:18 +0100
committerDenys Vlasenko2022-02-07 02:34:04 +0100
commitc193cbd6dfd095c6b8346bab1ea6ba7106b3e5bb (patch)
treeb75c366622b3146a4fdd3f7739b6eaf9d3bc1ac9 /libbb/hash_md5_sha_x86-64.S.sh
parent987be932ed3cbea56b68bbe85649191c13b66015 (diff)
downloadbusybox-c193cbd6dfd095c6b8346bab1ea6ba7106b3e5bb.zip
busybox-c193cbd6dfd095c6b8346bab1ea6ba7106b3e5bb.tar.gz
libbb/sha1: shrink and speed up unrolled x86-64 code
function old new delta sha1_process_block64 3514 3482 -32 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb/hash_md5_sha_x86-64.S.sh')
-rwxr-xr-xlibbb/hash_md5_sha_x86-64.S.sh9
1 files changed, 7 insertions, 2 deletions
diff --git a/libbb/hash_md5_sha_x86-64.S.sh b/libbb/hash_md5_sha_x86-64.S.sh
index 656fb54..fb1e4b5 100755
--- a/libbb/hash_md5_sha_x86-64.S.sh
+++ b/libbb/hash_md5_sha_x86-64.S.sh
@@ -203,8 +203,13 @@ echo "# PREP $@
movaps $xmmW12, $xmmT1
psrldq \$4, $xmmT1 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
- pshufd \$0x4e, $xmmW0, $xmmT2 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
- punpcklqdq $xmmW4, $xmmT2 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
+# pshufd \$0x4e, $xmmW0, $xmmT2 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
+# punpcklqdq $xmmW4, $xmmT2 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
+# same result as above, but shorter and faster:
+# pshufd/shufps are subtly different: pshufd takes all dwords from source operand,
+# shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one!
+ movaps $xmmW0, $xmmT2
+ shufps \$0x4e, $xmmW4, $xmmT2 # 01001110=(T2.dw[2], T2.dw[3], W4.dw[0], W4.dw[1]) = ([2],[3],[4],[5])
xorps $xmmW8, $xmmW0 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps $xmmT1, $xmmT2 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])