Line data Source code
1 : #include "tommath_private.h" 2 : #ifdef BN_S_MP_SQR_FAST_C 3 : /* LibTomMath, multiple-precision integer library -- Tom St Denis */ 4 : /* SPDX-License-Identifier: Unlicense */ 5 : 6 : /* the jist of squaring... 7 : * you do like mult except the offset of the tmpx [one that 8 : * starts closer to zero] can't equal the offset of tmpy. 9 : * So basically you set up iy like before then you min it with 10 : * (ty-tx) so that it never happens. You double all those 11 : * you add in the inner loop 12 : 13 : After that loop you do the squares and add them in. 14 : */ 15 : 16 698698 : mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b) 17 : { 18 33132 : int olduse, pa, ix, iz; 19 33132 : mp_digit W[MP_WARRAY], *tmpx; 20 33132 : mp_word W1; 21 33132 : mp_err err; 22 : 23 : /* grow the destination as required */ 24 698698 : pa = a->used + a->used; 25 698698 : if (b->alloc < pa) { 26 1661 : if ((err = mp_grow(b, pa)) != MP_OKAY) { 27 0 : return err; 28 : } 29 : } 30 : 31 : /* number of output digits to produce */ 32 665566 : W1 = 0; 33 41828442 : for (ix = 0; ix < pa; ix++) { 34 2339572 : int tx, ty, iy; 35 2339572 : mp_word _W; 36 2339572 : mp_digit *tmpy; 37 : 38 : /* clear counter */ 39 41129744 : _W = 0; 40 : 41 : /* get offsets into the two bignums */ 42 41129744 : ty = MP_MIN(a->used-1, ix); 43 41129744 : tx = ix - ty; 44 : 45 : /* setup temp aliases */ 46 41129744 : tmpx = a->dp + tx; 47 41129744 : tmpy = a->dp + ty; 48 : 49 : /* this is the number of times the loop will iterrate, essentially 50 : while (tx++ < a->used && ty-- >= 0) { ... } 51 : */ 52 41129744 : iy = MP_MIN(a->used-tx, ty+1); 53 : 54 : /* now for squaring tx can never equal ty 55 : * we halve the distance since they approach at a rate of 2x 56 : * and we have to round because odd cases need to be executed 57 : */ 58 41129744 : iy = MP_MIN(iy, ((ty-tx)+1)>>1); 59 : 60 : /* execute loop */ 61 365442620 : for (iz = 0; iz < iy; iz++) { 62 324312876 : _W += (mp_word)*tmpx++ * (mp_word)*tmpy--; 63 : } 64 : 65 : /* double the inner product and add carry */ 66 41129744 : _W = _W + _W + W1; 67 : 68 : /* even columns have the square term in them */ 69 41129744 : if (((unsigned)ix & 1u) == 0u) { 70 20564872 : _W += (mp_word)a->dp[ix>>1] * (mp_word)a->dp[ix>>1]; 71 : } 72 : 73 : /* store it */ 74 41129744 : W[ix] = (mp_digit)_W & MP_MASK; 75 : 76 : /* make next carry */ 77 41129744 : W1 = _W >> (mp_word)MP_DIGIT_BIT; 78 : } 79 : 80 : /* setup dest */ 81 698698 : olduse = b->used; 82 698698 : b->used = a->used+a->used; 83 : 84 : { 85 33132 : mp_digit *tmpb; 86 698698 : tmpb = b->dp; 87 41828442 : for (ix = 0; ix < pa; ix++) { 88 41129744 : *tmpb++ = W[ix] & MP_MASK; 89 : } 90 : 91 : /* clear unused digits [that existed in the old copy of c] */ 92 698698 : MP_ZERO_DIGITS(tmpb, olduse - ix); 93 : } 94 698698 : mp_clamp(b); 95 698698 : return MP_OKAY; 96 : } 97 : #endif