summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko2018-12-08 13:34:43 +0100
committerDenys Vlasenko2018-12-08 13:34:43 +0100
commit9f00a0fdb159432f1d7232253e2180d85e5eca32 (patch)
tree0b5e157a0bb052f71cc575c3ba93f14e88e91abd
parentf522dd94207275ac4a2706c4927a12c37707ff5a (diff)
downloadbusybox-9f00a0fdb159432f1d7232253e2180d85e5eca32.zip
busybox-9f00a0fdb159432f1d7232253e2180d85e5eca32.tar.gz
tls: make RIGHTSHIFTX() in AES-GCM faster
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--networking/tls_aesgcm.c63
1 files changed, 56 insertions, 7 deletions
diff --git a/networking/tls_aesgcm.c b/networking/tls_aesgcm.c
index 688df85..008dc9b 100644
--- a/networking/tls_aesgcm.c
+++ b/networking/tls_aesgcm.c
@@ -35,17 +35,66 @@ static ALWAYS_INLINE void FlattenSzInBits(byte* buf, word32 sz)
static void RIGHTSHIFTX(byte* x)
{
- int i;
- int carryOut = 0;
- int carryIn = 0;
- int borrow = x[15] & 0x01;
+#define l ((unsigned long*)x)
+#if 0
+ // Generic byte-at-a-time algorithm
+ int i;
+ byte carryIn = (x[15] & 0x01) ? 0xE1 : 0;
for (i = 0; i < AES_BLOCK_SIZE; i++) {
- carryOut = x[i] & 0x01;
- x[i] = (x[i] >> 1) | (carryIn ? 0x80 : 0);
+ byte carryOut = (x[i] << 7); // zero, or 0x80
+ x[i] = (x[i] >> 1) ^ carryIn;
+ carryIn = carryOut;
+ }
+
+#elif BB_BIG_ENDIAN
+
+ // Big-endian can shift-right in larger than byte chunks
+ // (we use the fact that 'x' is long-aligned)
+ unsigned long carryIn = (x[15] & 0x01)
+ ? ((unsigned long)0xE1 << (LONG_BIT-8))
+ : 0;
+# if ULONG_MAX <= 0xffffffff
+ int i;
+ for (i = 0; i < AES_BLOCK_SIZE/sizeof(long); i++) {
+ unsigned long carryOut = l[i] << (LONG_BIT-1); // zero, or 0x800..00
+ l[i] = (l[i] >> 1) ^ carryIn;
+ carryIn = carryOut;
+ }
+# else
+ // 64-bit code: need to process only 2 words
+ unsigned long carryOut = l[0] << (LONG_BIT-1); // zero, or 0x800..00
+ l[0] = (l[0] >> 1) ^ carryIn;
+ l[1] = (l[1] >> 1) ^ carryOut;
+# endif
+
+#else /* LITTLE_ENDIAN */
+
+ // In order to use word-sized ops, little-endian needs to byteswap.
+ // On x86, code size increase is ~10 bytes compared to byte-by-byte.
+ unsigned long carryIn = (x[15] & 0x01)
+ ? ((unsigned long)0xE1 << (LONG_BIT-8))
+ : 0;
+# if ULONG_MAX <= 0xffffffff
+ int i;
+ for (i = 0; i < AES_BLOCK_SIZE/sizeof(long); i++) {
+ unsigned long ti = SWAP_BE32(l[i]);
+ unsigned long carryOut = ti << (LONG_BIT-1); // zero, or 0x800..00
+ ti = (ti >> 1) ^ carryIn;
+ l[i] = SWAP_BE32(ti);
carryIn = carryOut;
}
- if (borrow) x[0] ^= 0xE1;
+# else
+ // 64-bit code: need to process only 2 words
+ unsigned long tt = SWAP_BE64(l[0]);
+ unsigned long carryOut = tt << (LONG_BIT-1); // zero, or 0x800..00
+ tt = (tt >> 1) ^ carryIn; l[0] = SWAP_BE64(tt);
+ tt = SWAP_BE64(l[1]);
+ tt = (tt >> 1) ^ carryOut; l[1] = SWAP_BE64(tt);
+# endif
+
+#endif /* LITTLE_ENDIAN */
+#undef l
}
static void GMULT(byte* X, byte* Y)