diff options
author | Denys Vlasenko | 2021-10-06 17:17:34 +0200 |
---|---|---|
committer | Denys Vlasenko | 2021-10-06 17:17:34 +0200 |
commit | 911344a99889319a7dba8a725a64dc324597f9eb (patch) | |
tree | 7273dd7914feb9b87a1b9e692a0dc432bb31ae96 /networking | |
parent | 22fd8fd3f4c271d6037753165131c7c35a039762 (diff) | |
download | busybox-911344a99889319a7dba8a725a64dc324597f9eb.zip busybox-911344a99889319a7dba8a725a64dc324597f9eb.tar.gz |
tls: P256: x86-64 assembly
function old new delta
sp_256_mont_mul_8 127 155 +28
sp_256_proj_point_dbl_8 448 469 +21
sp_256_mont_sub_8 23 35 +12
sp_256_mont_dbl_8 26 38 +12
sp_256_sub_8 44 49 +5
sp_256_ecc_mulmod_8 1530 1535 +5
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 6/0 up/down: 83/0) Total: 83 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'networking')
-rw-r--r-- | networking/tls_sp_c32.c | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 5320477..14a7c70 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -195,6 +195,34 @@ static int sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) : "memory" ); return reg; +#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) + /* x86_64 has no alignment restrictions, and is little-endian, + * so 64-bit and 32-bit representations are identical */ + uint64_t reg; + asm volatile ( +"\n movq (%0), %3" +"\n addq (%1), %3" +"\n movq %3, (%2)" +"\n" +"\n movq 1*8(%0), %3" +"\n adcq 1*8(%1), %3" +"\n movq %3, 1*8(%2)" +"\n" +"\n movq 2*8(%0), %3" +"\n adcq 2*8(%1), %3" +"\n movq %3, 2*8(%2)" +"\n" +"\n movq 3*8(%0), %3" +"\n adcq 3*8(%1), %3" +"\n movq %3, 3*8(%2)" +"\n" +"\n sbbq %3, %3" +"\n" + : "=r" (a), "=r" (b), "=r" (r), "=r" (reg) + : "0" (a), "1" (b), "2" (r) + : "memory" + ); + return reg; #else int i; sp_digit carry; @@ -265,6 +293,34 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) : "memory" ); return reg; +#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) + /* x86_64 has no alignment restrictions, and is little-endian, + * so 64-bit and 32-bit representations are identical */ + uint64_t reg; + asm volatile ( +"\n movq (%0), %3" +"\n subq (%1), %3" +"\n movq %3, (%2)" +"\n" +"\n movq 1*8(%0), %3" +"\n sbbq 1*8(%1), %3" +"\n movq %3, 1*8(%2)" +"\n" +"\n movq 2*8(%0), %3" +"\n sbbq 2*8(%1), %3" +"\n movq %3, 2*8(%2)" +"\n" +"\n movq 3*8(%0), %3" +"\n sbbq 3*8(%1), %3" +"\n movq %3, 3*8(%2)" +"\n" +"\n sbbq %3, %3" +"\n" + : "=r" (a), "=r" (b), "=r" (r), "=r" (reg) + : "0" (a), "1" (b), "2" (r) + : "memory" + ); + return reg; #else int i; sp_digit borrow; @@ -380,6 +436,49 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) } r[15] = accl; memcpy(r, rr, sizeof(rr)); +#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) + /* x86_64 has no alignment restrictions, and is little-endian, + * so 64-bit and 32-bit representations are identical */ + const uint64_t* aa = (const void*)a; + const uint64_t* bb = (const void*)b; + uint64_t rr[8]; + int k; + uint64_t accl; + uint64_t acch; + + acch = accl = 0; + for (k = 0; k < 7; k++) { + int i, j; + uint64_t acc_hi; + i = k - 3; + if (i < 0) + i = 0; + j = k - i; + acc_hi = 0; + do { +//////////////////////// +// uint128_t m = ((uint128_t)a[i]) * b[j]; +// acc_hi:acch:accl += m; + asm volatile ( + // aa[i] is already loaded in %%rax +"\n mulq %7" +"\n addq %%rax, %0" +"\n adcq %%rdx, %1" +"\n adcq $0, %2" + : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi) + : "0" (accl), "1" (acch), "2" (acc_hi), "a" (aa[i]), "m" (bb[j]) + : "cc", "dx" + ); +//////////////////////// + j--; + i++; + } while (i != 4 && i <= k); + rr[k] = accl; + accl = acch; + acch = acc_hi; + } + rr[7] = accl; + memcpy(r, rr, sizeof(rr)); #elif 0 //TODO: arm assembly (untested) sp_digit tmp[16]; |