diff options
author | Denys Vlasenko | 2017-01-19 16:45:41 +0100 |
---|---|---|
committer | Denys Vlasenko | 2017-01-19 16:45:41 +0100 |
commit | f7806f9d8fc889f1d6cd365b69d9d99a4a5a6e26 (patch) | |
tree | f7dca43d7506da675080d3a3b26dcde3835ecdeb | |
parent | 432f1ae2ff184e07fa78bd3797073094069e521d (diff) | |
download | busybox-f7806f9d8fc889f1d6cd365b69d9d99a4a5a6e26.zip busybox-f7806f9d8fc889f1d6cd365b69d9d99a4a5a6e26.tar.gz |
tls: fix ROL/ROR x86 optimization
ALWAYS_INLINE:
function old new delta
psAesInitKey 825 824 -1
ROR 5 - -5
setup_mix2 148 134 -14
psAesDecryptBlock 1184 1139 -45
psAesEncryptBlock 1193 1102 -91
------------------------------------------------------------------------------
(add/remove: 0/1 grow/shrink: 0/4 up/down: 0/-156) Total: -156 bytes
ALWAYS_INLINE + __builtin_constant_p(shift_cnt):
function old new delta
ROR 5 - -5
psAesInitKey 825 818 -7
setup_mix2 148 123 -25
psAesDecryptBlock 1184 1078 -106
psAesEncryptBlock 1193 1017 -176
------------------------------------------------------------------------------
(add/remove: 0/1 grow/shrink: 0/4 up/down: 0/-319) Total: -319 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | networking/tls_symmetric.h | 19 |
1 files changed, 14 insertions, 5 deletions
diff --git a/networking/tls_symmetric.h b/networking/tls_symmetric.h index b6b55c7..8488b43 100644 --- a/networking/tls_symmetric.h +++ b/networking/tls_symmetric.h @@ -7,9 +7,6 @@ /* The part below is a section of matrixssl-3-7-2b-open/crypto/cryptolib.h * Changes are flagged with //bbox - * TODO: - * Take a look at "roll %%cl" part... rotates by constant use fewer registers, - * and on many Intel CPUs rotates by %cl are slower: they take 2 cycles, not 1. */ /******************************************************************************/ @@ -28,16 +25,28 @@ #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && \ !defined(INTEL_CC) && !defined(PS_NO_ASM) -static inline unsigned ROL(unsigned word, int i) +static ALWAYS_INLINE unsigned ROL(unsigned word, int i) { + if (__builtin_constant_p(i)) //box + // Rotates by constant use fewer registers, + // and on many Intel CPUs rotates by %cl take 2 cycles, not 1. + asm ("roll %2,%0" //box + :"=r" (word) + :"0" (word),"i" (i)); + else //box asm ("roll %%cl,%0" :"=r" (word) :"0" (word),"c" (i)); return word; } -static inline unsigned ROR(unsigned word, int i) +static ALWAYS_INLINE unsigned ROR(unsigned word, int i) { + if (__builtin_constant_p(i)) //box + asm ("rorl %2,%0" //box + :"=r" (word) + :"0" (word),"i" (i)); + else //box asm ("rorl %%cl,%0" :"=r" (word) :"0" (word),"c" (i)); |