diff options
Diffstat (limited to 'networking/tls_pstm_montgomery_reduce.c')
-rw-r--r-- | networking/tls_pstm_montgomery_reduce.c | 423 |
1 files changed, 423 insertions, 0 deletions
diff --git a/networking/tls_pstm_montgomery_reduce.c b/networking/tls_pstm_montgomery_reduce.c new file mode 100644 index 0000000..c231c4d --- /dev/null +++ b/networking/tls_pstm_montgomery_reduce.c @@ -0,0 +1,423 @@ +/* + * Copyright (C) 2017 Denys Vlasenko + * + * Licensed under GPLv2, see file LICENSE in this source tree. + */ +#include "tls.h" + +/** + * @file pstm_montgomery_reduce.c + * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master) + * + * Multiprecision Montgomery Reduction. + */ +/* + * Copyright (c) 2013-2015 INSIDE Secure Corporation + * Copyright (c) PeerSec Networks, 2002-2011 + * All Rights Reserved + * + * The latest version of this code is available at http://www.matrixssl.org + * + * This software is open source; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This General Public License does NOT permit incorporating this software + * into proprietary programs. If you are unable to comply with the GPL, a + * commercial license for this software may be purchased from INSIDE at + * http://www.insidesecure.com/eng/Company/Locations + * + * This program is distributed in WITHOUT ANY WARRANTY; without even the + * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * http://www.gnu.org/copyleft/gpl.html + */ +/******************************************************************************/ + +///bbox +//#include "../cryptoApi.h" +#ifndef DISABLE_PSTM + +/******************************************************************************/ + +#if defined(PSTM_X86) +/* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */ +#if !defined(__GNUC__) || !defined(__i386__) || !defined(PSTM_32BIT) +#error "PSTM_X86 option requires GCC and 32 bit mode x86 processor" +#endif +//#pragma message ("Using 32 bit x86 Assembly Optimizations") + +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + +#define INNERMUL \ +asm( \ + "movl %5,%%eax \n\t" \ + "mull %4 \n\t" \ + "addl %1,%%eax \n\t" \ + "adcl $0,%%edx \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl $0,%%edx \n\t" \ + "movl %%edx,%1 \n\t" \ +:"=g"(_c[LO]), "=r"(cy) \ +:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \ +: "%eax", "%edx", "%cc") + +#define PROPCARRY \ +asm( \ + "addl %1,%0 \n\t" \ + "setb %%al \n\t" \ + "movzbl %%al,%1 \n\t" \ +:"=g"(_c[LO]), "=r"(cy) \ +:"0"(_c[LO]), "1"(cy) \ +: "%eax", "%cc") + +/******************************************************************************/ +#elif defined(PSTM_X86_64) +/* x86-64 optimized */ +#if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT) +#error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor" +#endif +//#pragma message ("Using 64 bit x86_64 Assembly Optimizations") + +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ +mu = c[x] * mp + +#define INNERMUL \ +asm( \ + "movq %5,%%rax \n\t" \ + "mulq %4 \n\t" \ + "addq %1,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rdx,%1 \n\t" \ + :"=g"(_c[LO]), "=r"(cy) \ + :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ + : "%rax", "%rdx", "cc") + +#define INNERMUL8 \ +asm( \ + "movq 0(%5),%%rax \n\t" \ + "movq 0(%2),%%r10 \n\t" \ + "movq 0x8(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x8(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "movq 0x10(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x10(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x8(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "movq 0x18(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x18(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x10(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "movq 0x20(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x20(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x18(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "movq 0x28(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x28(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x20(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "movq 0x30(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x30(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x28(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "movq 0x38(%5),%%r11 \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq 0x38(%2),%%r10 \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x30(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + "movq %%r11,%%rax \n\t" \ + "mulq %4 \n\t" \ + "addq %%r10,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "addq %3,%%rax \n\t" \ + "adcq $0,%%rdx \n\t" \ + "movq %%rax,0x38(%0) \n\t" \ + "movq %%rdx,%1 \n\t" \ + \ + :"=r"(_c), "=r"(cy) \ + : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\ + : "%rax", "%rdx", "%r10", "%r11", "cc") + +#define PROPCARRY \ +asm( \ + "addq %1,%0 \n\t" \ + "setb %%al \n\t" \ + "movzbq %%al,%1 \n\t" \ + :"=g"(_c[LO]), "=r"(cy) \ + :"0"(_c[LO]), "1"(cy) \ + : "%rax", "cc") + +/******************************************************************************/ +#elif defined(PSTM_ARM) + +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ +mu = c[x] * mp + +#ifdef __thumb2__ +//#pragma message ("Using 32 bit ARM Thumb2 Assembly Optimizations") +#define INNERMUL \ +asm( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " ITE CS \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ + " UMLAL r0,%0,%3,%4 \n\t" \ + " STR r0,%1 \n\t" \ + :"=r"(cy),"=m"(_c[0])\ + :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\ + :"r0","%cc"); +#define PROPCARRY \ +asm( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " STR r0,%1 \n\t" \ + " ITE CS \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ + :"=r"(cy),"=m"(_c[0])\ + :"0"(cy),"m"(_c[0])\ + :"r0","%cc"); +#else /* Non-Thumb2 code */ +//#pragma message ("Using 32 bit ARM Assembly Optimizations") +#define INNERMUL \ +asm( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ + " UMLAL r0,%0,%3,%4 \n\t" \ + " STR r0,%1 \n\t" \ + :"=r"(cy),"=m"(_c[0])\ + :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\ + :"r0","%cc"); +#define PROPCARRY \ +asm( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " STR r0,%1 \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ + :"=r"(cy),"=m"(_c[0])\ + :"0"(cy),"m"(_c[0])\ + :"r0","%cc"); +#endif /* __thumb2__ */ + + +/******************************************************************************/ +#elif defined(PSTM_MIPS) +/* MIPS32 */ +//#pragma message ("Using 32 bit MIPS Assembly Optimizations") +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ +mu = c[x] * mp + +#define INNERMUL \ +asm( \ + " multu %3,%4 \n\t" \ + " mflo $12 \n\t" \ + " mfhi $13 \n\t" \ + " addu $12,$12,%0 \n\t" \ + " sltu $10,$12,%0 \n\t" \ + " addu $13,$13,$10 \n\t" \ + " lw $10,%1 \n\t" \ + " addu $12,$12,$10 \n\t" \ + " sltu $10,$12,$10 \n\t" \ + " addu %0,$13,$10 \n\t" \ + " sw $12,%1 \n\t" \ + :"=r"(cy),"=m"(_c[0])\ + :"r"(cy),"r"(mu),"r"(tmpm[0]),"r"(_c[0])\ + :"$10","$12","$13")\ +; ++tmpm; + +#define PROPCARRY \ +asm( \ + " lw $10,%1 \n\t" \ + " addu $10,$10,%0 \n\t" \ + " sw $10,%1 \n\t" \ + " sltu %0,$10,%0 \n\t" \ + :"=r"(cy),"=m"(_c[0])\ + :"r"(cy),"r"(_c[0])\ + :"$10"); + + +/******************************************************************************/ +#else + +/* ISO C code */ +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + +#define INNERMUL \ + do { pstm_word t; \ + t = ((pstm_word)_c[0] + (pstm_word)cy) + \ + (((pstm_word)mu) * ((pstm_word)*tmpm++)); \ + _c[0] = (pstm_digit)t; \ + cy = (pstm_digit)(t >> DIGIT_BIT); \ + } while (0) + +#define PROPCARRY \ + do { pstm_digit t = _c[0] += cy; cy = (t < cy); } while (0) + +#endif + +/******************************************************************************/ + +#define LO 0 + +/* computes x/R == x (mod N) via Montgomery Reduction */ +int32 pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m, + pstm_digit mp, pstm_digit *paD, uint32 paDlen) +{ + pstm_digit *c, *_c, *tmpm, mu; + int32 oldused, x, y; + int16 pa; + + pa = m->used; + if (pa > a->alloc) { + /* Sanity test for bad numbers. This will confirm no buffer overruns */ + return PS_LIMIT_FAIL; + } + + if (paD && paDlen >= (uint32)2*pa+1) { + c = paD; + memset(c, 0x0, paDlen); + } else { + c = xzalloc(2*pa+1); + } + /* copy the input */ + oldused = a->used; + for (x = 0; x < oldused; x++) { + c[x] = a->dp[x]; + } + + MONT_START; + + for (x = 0; x < pa; x++) { + pstm_digit cy = 0; + /* get Mu for this round */ + LOOP_START; + _c = c + x; + tmpm = m->dp; + y = 0; +#ifdef PSTM_X86_64 + for (; y < (pa & ~7); y += 8) { + INNERMUL8; + _c += 8; + tmpm += 8; + } +#endif /* PSTM_X86_64 */ + for (; y < pa; y++) { + INNERMUL; + ++_c; + } + LOOP_END; + while (cy) { + PROPCARRY; + ++_c; + } + } + + /* now copy out */ + _c = c + pa; + tmpm = a->dp; + for (x = 0; x < pa+1; x++) { + *tmpm++ = *_c++; + } + + for (; x < oldused; x++) { + *tmpm++ = 0; + } + + MONT_FINI; + + a->used = pa+1; + pstm_clamp(a); + + /* reuse x as return code */ + x = PSTM_OKAY; + + /* if A >= m then A = A - m */ + if (pstm_cmp_mag (a, m) != PSTM_LT) { + if (s_pstm_sub (a, m, a) != PSTM_OKAY) { + x = PS_MEM_FAIL; + } + } + if (paDlen < (uint32)2*pa+1) { + psFree(c, pool); + } + return x; +} + +#endif /* !DISABLE_PSTM */ +/******************************************************************************/ |