C:\Data\fractdev\fractint\fractint\common\bignumc.c

File: common\bignumc.c

    1 /* bignumc.c - C routines equivalent to ASM routines in bignuma.asm */
    2 
    3 /*
    4 Wesley Loewer's Big Numbers.        (C) 1994-95, Wesley B. Loewer
    5 */
    6 
    7 #include <memory.h>
    8   /* see Fractint.c for a description of the "include"  hierarchy */
    9 #include "port.h"
   10 #include "big.h"
   11 
   12 /********************************************************************
   13  The following code contains the C versions of the routines from the
   14  file BIGNUMA.ASM.  It is provided here for portibility and for clarity.
   15 *********************************************************************/
   16 
   17 /********************************************************************
   18  Note:  The C code must be able to detect over/underflow.  Therefore
   19  32 bit integers must be used when doing 16 bit math.  All we really
   20  need is one more bit, such as is provided in asm with the carry bit.
   21  Functions that don't need the test for over/underflow, such as cmp_bn()
   22  and is_bn_not_zero(), can use 32 bit integers as as long as bnstep
   23  is set to 4.
   24 
   25  The 16/32 bit compination of integer sizes could be increased to
   26  32/64 bit to improve efficiency, but since many compilers don't offer
   27  64 bit integers, this option was not included.
   28 
   29 *********************************************************************/
   30 
   31 /********************************************************************/
   32 /* r = 0 */
   33 bn_t clear_bn(bn_t r)
   34     {
   35 #ifdef BIG_BASED
   36     _fmemset( r, 0, bnlength); /* set array to zero */
   37 #else
   38 #ifdef BIG_FAR
   39     _fmemset( r, 0, bnlength); /* set array to zero */
   40 #else
   41     memset( r, 0, bnlength); /* set array to zero */
   42 #endif
   43 #endif
   44     return r;
   45     }
   46 
   47 /********************************************************************/
   48 /* r = max positive value */
   49 bn_t max_bn(bn_t r)
   50     {
   51 #ifdef BIG_BASED
   52     _fmemset( r, 0xFF, bnlength-1); /* set to max values */
   53 #else
   54 #ifdef BIG_FAR
   55     _fmemset( r, 0xFF, bnlength-1); /* set to max values */
   56 #else
   57     memset( r, 0xFF, bnlength-1); /* set to max values */
   58 #endif
   59 #endif
   60     r[bnlength-1] = 0x7F;  /* turn off the sign bit */
   61     return r;
   62     }
   63 
   64 /********************************************************************/
   65 /* r = n */
   66 bn_t copy_bn(bn_t r, bn_t n)
   67     {
   68 #ifdef BIG_BASED
   69     _fmemcpy( r, n, bnlength);
   70 #else
   71 #ifdef BIG_FAR
   72     _fmemcpy( r, n, bnlength);
   73 #else
   74     memcpy( r, n, bnlength);
   75 #endif
   76 #endif
   77     return r;
   78     }
   79 
   80 /***************************************************************************/
   81 /* n1 != n2 ?                                                              */
   82 /* RETURNS:                                                                */
   83 /*  if n1 == n2 returns 0                                                  */
   84 /*  if n1 > n2 returns a positive (bytes left to go when mismatch occured) */
   85 /*  if n1 < n2 returns a negative (bytes left to go when mismatch occured) */
   86 int cmp_bn(bn_t n1, bn_t n2)
   87     {
   88     int i;
   89     S16 Svalue1, Svalue2;
   90     U16 value1, value2;
   91 
   92     /* two bytes at a time */
   93     /* signed comparison for msb */
   94     if ( (Svalue1=big_accessS16((S16 BIGDIST *)(n1+bnlength-2))) >
   95          (Svalue2=big_accessS16((S16 BIGDIST *)(n2+bnlength-2))) )
   96         { /* now determine which of the two bytes was different */
   97         if ( (S16)(Svalue1&0xFF00) > (S16)(Svalue2&0xFF00) ) /* compare just high bytes */
   98             return (bnlength); /* high byte was different */
   99         else
  100             return (bnlength-1); /* low byte was different */
  101         }
  102     else if (Svalue1 < Svalue2)
  103         { /* now determine which of the two bytes was different */
  104         if ( (S16)(Svalue1&0xFF00) < (S16)(Svalue2&0xFF00) ) /* compare just high bytes */
  105             return -(bnlength); /* high byte was different */
  106         else
  107             return -(bnlength-1); /* low byte was different */
  108         }
  109 
  110     /* unsigned comparison for the rest */
  111     for (i=bnlength-4; i>=0; i-=2)
  112         {
  113         if ( (value1=big_access16(n1+i)) > (value2=big_access16(n2+i)) )
  114             { /* now determine which of the two bytes was different */
  115             if ( (value1&0xFF00) > (value2&0xFF00) ) /* compare just high bytes */
  116                 return (i+2); /* high byte was different */
  117             else
  118                 return (i+1); /* low byte was different */
  119             }
  120         else if (value1 < value2)
  121             { /* now determine which of the two bytes was different */
  122             if ( (value1&0xFF00) < (value2&0xFF00) ) /* compare just high bytes */
  123                 return -(i+2); /* high byte was different */
  124             else
  125                 return -(i+1); /* low byte was different */
  126             }
  127         }
  128     return 0;
  129     }
  130 
  131 /********************************************************************/
  132 /* r < 0 ?                                      */
  133 /* returns 1 if negative, 0 if positive or zero */
  134 int is_bn_neg(bn_t n)
  135     {
  136     return (S8)n[bnlength-1] < 0;
  137     }
  138 
  139 /********************************************************************/
  140 /* n != 0 ?                      */
  141 /* RETURNS: if n != 0 returns 1  */
  142 /*          else returns 0       */
  143 int is_bn_not_zero(bn_t n)
  144     {
  145     int i;
  146 
  147     /* two bytes at a time */
  148     for (i=0; i<bnlength; i+=2)
  149         if (big_access16(n+i) != 0)
  150             return 1;
  151     return 0;
  152     }
  153 
  154 /********************************************************************/
  155 /* r = n1 + n2 */
  156 bn_t add_bn(bn_t r, bn_t n1, bn_t n2)
  157     {
  158     int i;
  159     U32 sum=0;
  160 
  161     /* two bytes at a time */
  162     for (i=0; i<bnlength; i+=2)
  163         {
  164         sum += (U32)big_access16(n1+i) + (U32)big_access16(n2+i); /* add 'em up */
  165         big_set16(r+i, (U16)sum);   /* store the lower 2 bytes */
  166         sum >>= 16; /* shift the overflow for next time */
  167         }
  168     return r;
  169     }
  170 
  171 /********************************************************************/
  172 /* r += n */
  173 bn_t add_a_bn(bn_t r, bn_t n)
  174     {
  175     int i;
  176     U32 sum=0;
  177 
  178     /* two bytes at a time */
  179     for (i=0; i<bnlength; i+=2)
  180         {
  181         sum += (U32)big_access16(r+i) + (U32)big_access16(n+i); /* add 'em up */
  182         big_set16(r+i, (U16)sum);   /* store the lower 2 bytes */
  183         sum >>= 16; /* shift the overflow for next time */
  184         }
  185     return r;
  186     }
  187 
  188 /********************************************************************/
  189 /* r = n1 - n2 */
  190 bn_t sub_bn(bn_t r, bn_t n1, bn_t n2)
  191     {
  192     int i;
  193     U32 diff=0;
  194 
  195     /* two bytes at a time */
  196     for (i=0; i<bnlength; i+=2)
  197         {
  198         diff = (U32)big_access16(n1+i) - ((U32)big_access16(n2+i)-(S32)(S16)diff); /* subtract with borrow */
  199         big_set16(r+i, (U16)diff);   /* store the lower 2 bytes */
  200         diff >>= 16; /* shift the underflow for next time */
  201         }
  202     return r;
  203     }
  204 
  205 /********************************************************************/
  206 /* r -= n */
  207 bn_t sub_a_bn(bn_t r, bn_t n)
  208     {
  209     int i;
  210     U32 diff=0;
  211 
  212     /* two bytes at a time */
  213     for (i=0; i<bnlength; i+=2)
  214         {
  215         diff = (U32)big_access16(r+i) - ((U32)big_access16(n+i)-(S32)(S16)diff); /* subtract with borrow */
  216         big_set16(r+i, (U16)diff);   /* store the lower 2 bytes */
  217         diff >>= 16; /* shift the underflow for next time */
  218         }
  219     return r;
  220     }
  221 
  222 /********************************************************************/
  223 /* r = -n */
  224 bn_t neg_bn(bn_t r, bn_t n)
  225     {
  226     int i;
  227     U16 t_short;
  228     U32 neg=1; /* to get the 2's complement started */
  229 
  230     /* two bytes at a time */
  231     for (i=0; neg != 0 && i<bnlength; i+=2)
  232         {
  233         t_short = ~big_access16(n+i);
  234         neg += ((U32)t_short); /* two's complement */
  235         big_set16(r+i, (U16)neg);   /* store the lower 2 bytes */
  236         neg >>= 16; /* shift the sign bit for next time */
  237         }
  238     /* if neg was 0, then just "not" the rest */
  239     for (; i<bnlength; i+=2)
  240         { /* notice that big_access16() and big_set16() are not needed here */
  241         *(U16 BIGDIST *)(r+i) = ~*(U16 BIGDIST *)(n+i); /* toggle all the bits */
  242         }
  243     return r;
  244     }
  245 
  246 /********************************************************************/
  247 /* r *= -1 */
  248 bn_t neg_a_bn(bn_t r)
  249     {
  250     int i;
  251     U16 t_short;
  252     U32 neg=1; /* to get the 2's complement started */
  253 
  254     /* two bytes at a time */
  255     for (i=0; neg != 0 && i<bnlength; i+=2)
  256         {
  257         t_short = ~big_access16(r+i);
  258         neg += ((U32)t_short); /* two's complement */
  259         big_set16(r+i, (U16)neg);   /* store the lower 2 bytes */
  260         neg >>= 16; /* shift the sign bit for next time */
  261         }
  262     /* if neg was 0, then just "not" the rest */
  263     for (; i<bnlength; i+=2)
  264         { /* notice that big_access16() and big_set16() are not needed here */
  265         *(U16 BIGDIST *)(r+i) = ~*(U16 BIGDIST *)(r+i); /* toggle all the bits */
  266         }
  267     return r;
  268     }
  269 
  270 /********************************************************************/
  271 /* r = 2*n */
  272 bn_t double_bn(bn_t r, bn_t n)
  273     {
  274     int i;
  275     U32 prod=0;
  276 
  277     /* two bytes at a time */
  278     for (i=0; i<bnlength; i+=2)
  279         {
  280         prod += (U32)big_access16(n+i)<<1 ; /* double it */
  281         big_set16(r+i, (U16)prod);   /* store the lower 2 bytes */
  282         prod >>= 16; /* shift the overflow for next time */
  283         }
  284     return r;
  285     }
  286 
  287 /********************************************************************/
  288 /* r *= 2 */
  289 bn_t double_a_bn(bn_t r)
  290     {
  291     int i;
  292     U32 prod=0;
  293 
  294     /* two bytes at a time */
  295     for (i=0; i<bnlength; i+=2)
  296         {
  297         prod += (U32)big_access16(r+i)<<1 ; /* double it */
  298         big_set16(r+i, (U16)prod);   /* store the lower 2 bytes */
  299         prod >>= 16; /* shift the overflow for next time */
  300         }
  301     return r;
  302     }
  303 
  304 /********************************************************************/
  305 /* r = n/2 */
  306 bn_t half_bn(bn_t r, bn_t n)
  307     {
  308     int i;
  309     U32 quot=0;
  310 
  311     /* two bytes at a time */
  312 
  313     /* start with an arithmetic shift */
  314     i=bnlength-2;
  315     quot += (U32)(((S32)(S16)big_access16(n+i)<<16)>>1) ; /* shift to upper 2 bytes and half it */
  316     big_set16(r+i, (U16)(quot>>16));   /* store the upper 2 bytes */
  317     quot <<= 16; /* shift the underflow for next time */
  318 
  319     for (i=bnlength-4; i>=0; i-=2)
  320         {
  321         /* looks wierd, but properly sign extends argument */
  322         quot += (U32)(((U32)big_access16(n+i)<<16)>>1) ; /* shift to upper 2 bytes and half it */
  323         big_set16(r+i, (U16)(quot>>16));   /* store the upper 2 bytes */
  324         quot <<= 16; /* shift the underflow for next time */
  325         }
  326 
  327     return r;
  328     }
  329 
  330 /********************************************************************/
  331 /* r /= 2 */
  332 bn_t half_a_bn(bn_t r)
  333     {
  334     int i;
  335     U32 quot=0;
  336 
  337     /* two bytes at a time */
  338 
  339     /* start with an arithmetic shift */
  340     i=bnlength-2;
  341     quot += (U32)(((S32)(S16)big_access16(r+i)<<16)>>1) ; /* shift to upper 2 bytes and half it */
  342     big_set16(r+i, (U16)(quot>>16));   /* store the upper 2 bytes */
  343     quot <<= 16; /* shift the underflow for next time */
  344 
  345     for (i=bnlength-4; i>=0; i-=2)
  346         {
  347         /* looks wierd, but properly sign extends argument */
  348         quot += (U32)(((U32)(U16)big_access16(r+i)<<16)>>1) ; /* shift to upper 2 bytes and half it */
  349         big_set16(r+i, (U16)(quot>>16));   /* store the upper 2 bytes */
  350         quot <<= 16; /* shift the underflow for next time */
  351         }
  352     return r;
  353     }
  354 
  355 /************************************************************************/
  356 /* r = n1 * n2                                                          */
  357 /* Note: r will be a double wide result, 2*bnlength                     */
  358 /*       n1 and n2 can be the same pointer                              */
  359 /* SIDE-EFFECTS: n1 and n2 are changed to their absolute values         */
  360 bn_t unsafe_full_mult_bn(bn_t r, bn_t n1, bn_t n2)
  361     {
  362     int sign1, sign2 = 0, samevar;
  363     int i, j, k, steps, doublesteps, carry_steps;
  364     bn_t n1p, n2p;      /* pointers for n1, n2 */
  365     bn_t rp1, rp2, rp3; /* pointers for r */
  366     U32 prod, sum;
  367 
  368     if ((sign1 = is_bn_neg(n1)) != 0) /* =, not == */
  369         neg_a_bn(n1);
  370         samevar = (n1 == n2);
  371         if (!samevar) /* check to see if they're the same pointer */
  372         if ((sign2 = is_bn_neg(n2)) != 0) /* =, not == */
  373             neg_a_bn(n2);
  374 
  375     n1p = n1;
  376     steps = bnlength>>1; /* two bytes at a time */
  377     carry_steps = doublesteps = (steps<<1) - 2;
  378     bnlength <<= 1;
  379     clear_bn(r);        /* double width */
  380     bnlength >>= 1;
  381     rp1 = rp2 = r;
  382     for (i = 0; i < steps; i++)
  383         {
  384         n2p = n2;
  385         for (j = 0; j < steps; j++)
  386             {
  387             prod = (U32)big_access16(n1p) * (U32)big_access16(n2p); /* U16*U16=U32 */
  388             sum = (U32)big_access16(rp2) + prod; /* add to previous, including overflow */
  389             big_set16(rp2, (U16)sum); /* save the lower 2 bytes */
  390             sum >>= 16;             /* keep just the upper 2 bytes */
  391             rp3 = rp2 + 2;          /* move over 2 bytes */
  392             sum += big_access16(rp3);     /* add what was the upper two bytes */
  393             big_set16(rp3 ,(U16)sum); /* save what was the upper two bytes */
  394             sum >>= 16;             /* keep just the overflow */
  395             for (k=0; sum != 0 && k<carry_steps; k++)
  396                 {
  397                 rp3 += 2;               /* move over 2 bytes */
  398                 sum += big_access16(rp3);     /* add to what was the overflow */
  399                 big_set16(rp3, (U16)sum); /* save what was the overflow */
  400                 sum >>= 16;             /* keep just the new overflow */
  401                 }
  402             n2p += 2;       /* to next word */
  403             rp2 += 2;
  404             carry_steps--;  /* use one less step */
  405             }
  406         n1p += 2;           /* to next word */
  407         rp2 = rp1 += 2;
  408         carry_steps = --doublesteps; /* decrease doubles steps and reset carry_steps */
  409         }
  410 
  411     /* if they were the same or same sign, the product must be positive */
  412     if (!samevar && sign1 != sign2)
  413         {
  414         bnlength <<= 1;         /* for a double wide number */
  415         neg_a_bn(r);
  416         bnlength >>= 1; /* restore bnlength */
  417         }
  418     return r;
  419     }
  420 
  421 /************************************************************************/
  422 /* r = n1 * n2 calculating only the top rlength bytes                   */
  423 /* Note: r will be of length rlength                                    */
  424 /*       2*bnlength <= rlength < bnlength                               */
  425 /*       n1 and n2 can be the same pointer                              */
  426 /* SIDE-EFFECTS: n1 and n2 are changed to their absolute values         */
  427 bn_t unsafe_mult_bn(bn_t r, bn_t n1, bn_t n2)
  428     {
  429     int sign1, sign2 = 0, samevar;
  430     int i, j, k, steps, doublesteps, carry_steps, skips;
  431     bn_t n1p, n2p;      /* pointers for n1, n2 */
  432     bn_t rp1, rp2, rp3; /* pointers for r */
  433     U32 prod, sum;
  434     int bnl; /* temp bnlength holder */
  435 
  436     bnl = bnlength;
  437     if ((sign1 = is_bn_neg(n1)) != 0) /* =, not == */
  438         neg_a_bn(n1);
  439         samevar = (n1 == n2);
  440         if (!samevar) /* check to see if they're the same pointer */
  441         if ((sign2 = is_bn_neg(n2)) != 0) /* =, not == */
  442             neg_a_bn(n2);
  443     n1p = n1;
  444     n2 += (bnlength<<1) - rlength;  /* shift n2 over to where it is needed */
  445 
  446     bnlength = rlength;
  447     clear_bn(r);        /* zero out r, rlength width */
  448     bnlength = bnl;
  449 
  450     steps = (rlength-bnlength)>>1;
  451     skips = (bnlength>>1) - steps;
  452     carry_steps = doublesteps = (rlength>>1)-2;
  453     rp2 = rp1 = r;
  454     for (i=bnlength>>1; i>0; i--)
  455         {
  456         n2p = n2;
  457         for (j=0; j<steps; j++)
  458             {
  459             prod = (U32)big_access16(n1p) * (U32)big_access16(n2p); /* U16*U16=U32 */
  460             sum = (U32)big_access16(rp2) + prod; /* add to previous, including overflow */
  461             big_set16(rp2, (U16)sum); /* save the lower 2 bytes */
  462             sum >>= 16;             /* keep just the upper 2 bytes */
  463             rp3 = rp2 + 2;          /* move over 2 bytes */
  464             sum += big_access16(rp3);     /* add what was the upper two bytes */
  465             big_set16(rp3, (U16)sum); /* save what was the upper two bytes */
  466             sum >>= 16;             /* keep just the overflow */
  467             for (k=0; sum != 0 && k<carry_steps; k++)
  468                 {
  469                 rp3 += 2;               /* move over 2 bytes */
  470                 sum += big_access16(rp3);     /* add to what was the overflow */
  471                 big_set16(rp3, (U16)sum); /* save what was the overflow */
  472                 sum >>= 16;             /* keep just the new overflow */
  473                 }
  474             n2p += 2;                   /* increase by two bytes */
  475             rp2 += 2;
  476             carry_steps--;
  477             }
  478         n1p += 2;   /* increase by two bytes */
  479 
  480         if (skips != 0)
  481             {
  482             n2 -= 2;    /* shift n2 back a word */
  483             steps++;    /* one more step this time */
  484             skips--;    /* keep track of how many times we've done this */
  485             }
  486         else
  487             {
  488             rp1 += 2;           /* shift forward a word */
  489             doublesteps--;      /* reduce the carry steps needed next time */
  490             }
  491         rp2 = rp1;
  492         carry_steps = doublesteps;
  493         }
  494 
  495     /* if they were the same or same sign, the product must be positive */
  496     if (!samevar && sign1 != sign2)
  497         {
  498         bnlength = rlength;
  499         neg_a_bn(r);            /* wider bignumber */
  500         bnlength = bnl;
  501         }
  502     return r;
  503     }
  504 
  505 /************************************************************************/
  506 /* r = n^2                                                              */
  507 /*   because of the symetry involved, n^2 is much faster than n*n       */
  508 /*   for a bignumber of length l                                        */
  509 /*      n*n takes l^2 multiplications                                   */
  510 /*      n^2 takes (l^2+l)/2 multiplications                             */
  511 /*          which is about 1/2 n*n as l gets large                      */
  512 /*  uses the fact that (a+b+c+...)^2 = (a^2+b^2+c^2+...)+2(ab+ac+bc+...)*/
  513 /*                                                                      */
  514 /* SIDE-EFFECTS: n is changed to its absolute value                     */
  515 bn_t unsafe_full_square_bn(bn_t r, bn_t n)
  516     {
  517     int i, j, k, steps, doublesteps, carry_steps;
  518     bn_t n1p, n2p;
  519     bn_t rp1, rp2, rp3;
  520     U32 prod, sum;
  521 
  522     if (is_bn_neg(n))  /* don't need to keep track of sign since the */
  523         neg_a_bn(n);   /* answer must be positive. */
  524 
  525     bnlength <<= 1;
  526     clear_bn(r);        /* zero out r, double width */
  527     bnlength >>= 1;
  528 
  529     steps = (bnlength>>1)-1;
  530     carry_steps = doublesteps = (steps<<1) - 1;
  531     rp2 = rp1 = r + 2;  /* start with second two-byte word */
  532     n1p = n;
  533     if (steps != 0) /* if zero, then skip all the middle term calculations */
  534         {
  535         for (i=steps; i>0; i--) /* steps gets altered, count backwards */
  536             {
  537             n2p = n1p + 2;  /* set n2p pointer to 1 step beyond n1p */
  538             for (j=0; j<steps; j++)
  539                 {
  540                 prod = (U32)big_access16(n1p) * (U32)big_access16(n2p); /* U16*U16=U32 */
  541                 sum = (U32)big_access16(rp2) + prod; /* add to previous, including overflow */
  542                 big_set16(rp2, (U16)sum); /* save the lower 2 bytes */
  543                 sum >>= 16;             /* keep just the upper 2 bytes */
  544                 rp3 = rp2 + 2;          /* move over 2 bytes */
  545                 sum += big_access16(rp3);     /* add what was the upper two bytes */
  546                 big_set16(rp3, (U16)sum); /* save what was the upper two bytes */
  547                 sum >>= 16;             /* keep just the overflow */
  548                 for (k=0; sum != 0 && k<carry_steps; k++)
  549                     {
  550                     rp3 += 2;               /* move over 2 bytes */
  551                     sum += big_access16(rp3);     /* add to what was the overflow */
  552                     big_set16(rp3, (U16)sum); /* save what was the overflow */
  553                     sum >>= 16;             /* keep just the new overflow */
  554                     }
  555                 n2p += 2;       /* increase by two bytes */
  556                 rp2 += 2;
  557                 carry_steps--;
  558                 }
  559             n1p += 2;           /* increase by two bytes */
  560             rp2 = rp1 += 4;     /* increase by 2 * two bytes */
  561             carry_steps = doublesteps -= 2;   /* reduce the carry steps needed */
  562             steps--;
  563             }
  564         /* All the middle terms have been multiplied.  Now double it. */
  565         bnlength <<= 1;     /* double wide bignumber */
  566         double_a_bn(r);
  567         bnlength >>= 1;
  568         /* finished with middle terms */
  569         }
  570 
  571     /* Now go back and add in the squared terms. */
  572     n1p = n;
  573     steps = (bnlength>>1);
  574     carry_steps = doublesteps = (steps<<1) - 2;
  575     rp1 = r;
  576     for (i=0; i<steps; i++)
  577         {
  578         /* square it */
  579         prod = (U32)big_access16(n1p) * (U32)big_access16(n1p); /* U16*U16=U32 */
  580         sum = (U32)big_access16(rp1) + prod; /* add to previous, including overflow */
  581         big_set16(rp1, (U16)sum); /* save the lower 2 bytes */
  582         sum >>= 16;             /* keep just the upper 2 bytes */
  583         rp3 = rp1 + 2;          /* move over 2 bytes */
  584         sum += big_access16(rp3);     /* add what was the upper two bytes */
  585         big_set16(rp3, (U16)sum); /* save what was the upper two bytes */
  586         sum >>= 16;             /* keep just the overflow */
  587         for (k=0; sum != 0 && k<carry_steps; k++)
  588             {
  589             rp3 += 2;               /* move over 2 bytes */
  590             sum += big_access16(rp3);     /* add to what was the overflow */
  591             big_set16(rp3, (U16)sum); /* save what was the overflow */
  592             sum >>= 16;             /* keep just the new overflow */
  593             }
  594         n1p += 2;       /* increase by 2 bytes */
  595         rp1 += 4;       /* increase by 4 bytes */
  596         carry_steps = doublesteps -= 2;
  597         }
  598     return r;
  599     }
  600 
  601 
  602 /************************************************************************/
  603 /* r = n^2                                                              */
  604 /*   because of the symetry involved, n^2 is much faster than n*n       */
  605 /*   for a bignumber of length l                                        */
  606 /*      n*n takes l^2 multiplications                                   */
  607 /*      n^2 takes (l^2+l)/2 multiplications                             */
  608 /*          which is about 1/2 n*n as l gets large                      */
  609 /*  uses the fact that (a+b+c+...)^2 = (a^2+b^2+c^2+...)+2(ab+ac+bc+...)*/
  610 /*                                                                      */
  611 /* Note: r will be of length rlength                                    */
  612 /*       2*bnlength >= rlength > bnlength                               */
  613 /* SIDE-EFFECTS: n is changed to its absolute value                     */
  614 bn_t unsafe_square_bn(bn_t r, bn_t n)
  615     {
  616     int i, j, k, steps, doublesteps, carry_steps;
  617     int skips, rodd;
  618     bn_t n1p, n2p, n3p;
  619     bn_t rp1, rp2, rp3;
  620     U32 prod, sum;
  621     int bnl;
  622 
  623 /* This whole procedure would be a great deal simpler if we could assume that */
  624 /* rlength < 2*bnlength (that is, not =).  Therefore, we will take the        */
  625 /* easy way out and call full_square_bn() if it is.                           */
  626     if (rlength == (bnlength<<1)) /* rlength == 2*bnlength */
  627         return unsafe_full_square_bn(r, n);    /* call full_square_bn() and quit */
  628 
  629     if (is_bn_neg(n))  /* don't need to keep track of sign since the */
  630         neg_a_bn(n);   /* answer must be positive. */
  631 
  632     bnl = bnlength;
  633     bnlength = rlength;
  634     clear_bn(r);        /* zero out r, of width rlength */
  635     bnlength = bnl;
  636 
  637     /* determine whether r is on an odd or even two-byte word in the number */
  638     rodd = (U16)(((bnlength<<1)-rlength)>>1) & 0x0001;
  639     i = (bnlength>>1)-1;
  640     steps = (rlength-bnlength)>>1;
  641     carry_steps = doublesteps = (bnlength>>1)+steps-2;
  642     skips = (i - steps)>>1;     /* how long to skip over pointer shifts */
  643     rp2 = rp1 = r;
  644     n1p = n;
  645     n3p = n2p = n1p + (((bnlength>>1)-steps)<<1);    /* n2p = n1p + 2*(bnlength/2 - steps) */
  646     if (i != 0) /* if zero, skip middle term calculations */
  647         {
  648         /* i is already set */
  649         for (; i>0; i--)
  650             {
  651             for (j=0; j<steps; j++)
  652                 {
  653                 prod = (U32)big_access16(n1p) * (U32)big_access16(n2p); /* U16*U16=U32 */
  654                 sum = (U32)big_access16(rp2) + prod; /* add to previous, including overflow */
  655                 big_set16(rp2, (U16)sum); /* save the lower 2 bytes */
  656                 sum >>= 16;             /* keep just the upper 2 bytes */
  657                 rp3 = rp2 + 2;          /* move over 2 bytes */
  658                 sum += big_access16(rp3);     /* add what was the upper two bytes */
  659                 big_set16(rp3, (U16)sum); /* save what was the upper two bytes */
  660                 sum >>= 16;             /* keep just the overflow */
  661                 for (k=0; sum != 0 && k<carry_steps; k++)
  662                     {
  663                     rp3 += 2;               /* move over 2 bytes */
  664                     sum += big_access16(rp3);     /* add to what was the overflow */
  665                     big_set16(rp3, (U16)sum); /* save what was the overflow */
  666                     sum >>= 16;             /* keep just the new overflow */
  667                     }
  668                 n2p += 2;       /* increase by 2-byte word size */
  669                 rp2 += 2;
  670                 carry_steps--;
  671                 }
  672             n1p += 2;       /* increase by 2-byte word size */
  673             if (skips > 0)
  674                 {
  675                 n2p = n3p -= 2;
  676                 steps++;
  677                 skips--;
  678                 }
  679             else if (skips == 0)    /* only gets executed once */
  680                 {
  681                 steps -= rodd;  /* rodd is 1 or 0 */
  682                 doublesteps -= rodd+1;
  683                 rp1 += (rodd+1)<<1;
  684                 n2p = n1p+2;
  685                 skips--;
  686                 }
  687             else /* skips < 0 */
  688                 {
  689                 steps--;
  690                 doublesteps -= 2;
  691                 rp1 += 4;           /* add two 2-byte words */
  692                 n2p = n1p + 2;
  693                 }
  694             rp2 = rp1;
  695             carry_steps = doublesteps;
  696             }
  697         /* All the middle terms have been multiplied.  Now double it. */
  698         bnlength = rlength;
  699         double_a_bn(r);
  700         bnlength = bnl;
  701         }
  702     /* Now go back and add in the squared terms. */
  703 
  704     /* be careful, the next dozen or so lines are confusing!       */
  705     /* determine whether r is on an odd or even word in the number */
  706     /* using i as a temporary variable here */
  707     i = (bnlength<<1)-rlength;
  708     rp1 = r + ((U16)i & (U16)0x0002);
  709     i = (U16)((i>>1)+1) & (U16)0xFFFE;
  710     n1p = n + i;
  711     /* i here is no longer a temp var., but will be used as a loop counter */
  712     i = (bnlength - i)>>1;
  713     carry_steps = doublesteps = (i<<1)-2;
  714     /* i is already set */
  715     for (; i>0; i--)
  716         {
  717         /* square it */
  718         prod = (U32)big_access16(n1p) * (U32)big_access16(n1p); /* U16*U16=U32 */
  719         sum = (U32)big_access16(rp1) + prod; /* add to previous, including overflow */
  720         big_set16(rp1, (U16)sum); /* save the lower 2 bytes */
  721         sum >>= 16;             /* keep just the upper 2 bytes */
  722         rp3 = rp1 + 2;          /* move over 2 bytes */
  723         sum += big_access16(rp3);     /* add what was the upper two bytes */
  724         big_set16(rp3, (U16)sum); /* save what was the upper two bytes */
  725         sum >>= 16;             /* keep just the overflow */
  726         for (k=0; sum != 0 && k<carry_steps; k++)
  727             {
  728             rp3 += 2;               /* move over 2 bytes */
  729             sum += big_access16(rp3);     /* add to what was the overflow */
  730             big_set16(rp3, (U16)sum); /* save what was the overflow */
  731             sum >>= 16;             /* keep just the new overflow */
  732             }
  733         n1p += 2;
  734         rp1 += 4;
  735         carry_steps = doublesteps -= 2;
  736         }
  737     return r;
  738     }
  739 
  740 /********************************************************************/
  741 /* r = n * u  where u is an unsigned integer */
  742 bn_t mult_bn_int(bn_t r, bn_t n, U16 u)
  743     {
  744     int i;
  745     U32 prod=0;
  746 
  747     /* two bytes at a time */
  748     for (i=0; i<bnlength; i+=2)
  749         {
  750         prod += (U32)big_access16(n+i) * u ; /* n*u */
  751         big_set16(r+i, (U16)prod);   /* store the lower 2 bytes */
  752         prod >>= 16; /* shift the overflow for next time */
  753         }
  754     return r;
  755     }
  756 
  757 /********************************************************************/
  758 /* r *= u  where u is an unsigned integer */
  759 bn_t mult_a_bn_int(bn_t r, U16 u)
  760     {
  761     int i;
  762     U32 prod=0;
  763 
  764     /* two bytes at a time */
  765     for (i=0; i<bnlength; i+=2)
  766         {
  767         prod += (U32)big_access16(r+i) * u ; /* r*u */
  768         big_set16(r+i, (U16)prod);   /* store the lower 2 bytes */
  769         prod >>= 16; /* shift the overflow for next time */
  770         }
  771     return r;
  772     }
  773 
  774 /********************************************************************/
  775 /* r = n / u  where u is an unsigned integer */
  776 bn_t unsafe_div_bn_int(bn_t r, bn_t n,  U16 u)
  777     {
  778     int i, sign;
  779     U32 full_number;
  780     U16 quot, rem=0;
  781 
  782     sign = is_bn_neg(n);
  783     if (sign)
  784         neg_a_bn(n);
  785 
  786     if (u == 0) /* division by zero */
  787         {
  788         max_bn(r);
  789         if (sign)
  790             neg_a_bn(r);
  791         return r;
  792         }
  793 
  794     /* two bytes at a time */
  795     for (i=bnlength-2; i>=0; i-=2)
  796         {
  797         full_number = ((U32)rem<<16) + (U32)big_access16(n+i);
  798         quot = (U16)(full_number / u);
  799         rem  = (U16)(full_number % u);
  800         big_set16(r+i, quot);
  801         }
  802 
  803     if (sign)
  804         neg_a_bn(r);
  805     return r;
  806     }
  807 
  808 /********************************************************************/
  809 /* r /= u  where u is an unsigned integer */
  810 bn_t div_a_bn_int(bn_t r, U16 u)
  811     {
  812     int i, sign;
  813     U32 full_number;
  814     U16 quot, rem=0;
  815 
  816     sign = is_bn_neg(r);
  817     if (sign)
  818         neg_a_bn(r);
  819 
  820     if (u == 0) /* division by zero */
  821         {
  822         max_bn(r);
  823         if (sign)
  824             neg_a_bn(r);
  825         return r;
  826         }
  827 
  828     /* two bytes at a time */
  829     for (i=bnlength-2; i>=0; i-=2)
  830         {
  831         full_number = ((U32)rem<<16) + (U32)big_access16(r+i);
  832         quot = (U16)(full_number / u);
  833         rem  = (U16)(full_number % u);
  834         big_set16(r+i, quot);
  835         }
  836 
  837     if (sign)
  838         neg_a_bn(r);
  839     return r;
  840     }
  841 
  842 /*********************************************************************/
  843 /*  f = b                                                            */
  844 /*  Converts a bignumber to a double                                 */
  845 LDBL bntofloat(bn_t n)
  846     {
  847     int i;
  848     int signflag=0;
  849     int expon;
  850     bn_t getbyte;
  851     LDBL f=0;
  852 
  853     if (is_bn_neg(n))
  854         {
  855         signflag = 1;
  856         neg_a_bn(n);
  857         }
  858 
  859     expon = intlength - 1;
  860     getbyte = n + bnlength - 1;
  861     while (*getbyte == 0 && getbyte >= n)
  862       {
  863       getbyte--;
  864       expon--;
  865       }
  866 
  867     /* There is no need to use all bnlength bytes.  To get the full */
  868     /* precision of LDBL, all you need is LDBL_MANT_DIG/8+1.        */
  869     for (i = 0; i < (LDBL_MANT_DIG/8+1) && getbyte >= n; i++, getbyte--)
  870         {
  871         f += scale_256(*getbyte,-i);
  872         }
  873 
  874     f = scale_256(f,expon);
  875 
  876     if (signflag)
  877         {
  878         f = -f;
  879         neg_a_bn(n);
  880         }
  881     return f;
  882     }
  883 
  884 /*****************************************/
  885 /* the following used to be in bigfltc.c */
  886 
  887 /********************************************************************/
  888 /* r = 0 */
  889 bf_t clear_bf(bf_t r)
  890     {
  891     _fmemset( r, 0, bflength+2); /* set array to zero */
  892     return r;
  893     }
  894 
  895 /********************************************************************/
  896 /* r = n */
  897 bf_t copy_bf(bf_t r, bf_t n)
  898     {
  899     _fmemcpy( r, n, bflength+2);
  900     return r;
  901     }
  902 
  903 /*********************************************************************/
  904 /*  b = f                                                            */
  905 /*  Converts a double to a bigfloat                                  */
  906 bf_t floattobf(bf_t r, LDBL f)
  907     {
  908     int power;
  909     int bnl, il;
  910     if (f == 0)
  911         {
  912         clear_bf(r);
  913         return r;
  914         }
  915 
  916     /* remove the exp part */
  917     f = extract_256(f, &power);
  918 
  919     bnl = bnlength;
  920     bnlength = bflength;
  921     il = intlength;
  922     intlength = 2;
  923     floattobn(r, f);
  924     bnlength = bnl;
  925     intlength = il;
  926 
  927     big_set16(r + bflength, (S16)power); /* exp */
  928 
  929     return r;
  930     }
  931 
  932 /*********************************************************************/
  933 /*  b = f                                                            */
  934 /*  Converts a double to a bigfloat                                  */
  935 bf_t floattobf1(bf_t r, LDBL f)
  936     {
  937     char msg[80];
  938 #ifdef USE_LONG_DOUBLE
  939     sprintf(msg,"%-.22Le",f);
  940 #else
  941     sprintf(msg,"%-.22le",f);
  942 #endif
  943     strtobf(r,msg);
  944     return r;
  945     }
  946 
  947 /*********************************************************************/
  948 /*  f = b                                                            */
  949 /*  Converts a bigfloat to a double                                 */
  950 LDBL bftofloat(bf_t n)
  951     {
  952     int power;
  953     int bnl, il;
  954     LDBL f;
  955 
  956     bnl = bnlength;
  957     bnlength = bflength;
  958     il = intlength;
  959     intlength = 2;
  960     f = bntofloat(n);
  961     bnlength = bnl;
  962     intlength = il;
  963 
  964     power = (S16)big_access16(n + bflength);
  965     f = scale_256(f,power);
  966 
  967     return f;
  968     }
  969 
  970 /********************************************************************/
  971 /* extracts the mantissa and exponent of f                          */
  972 /* finds m and n such that 1<=|m|<256 and f = m*256^n               */
  973 /* n is stored in *exp_ptr and m is returned, sort of like frexp()  */
  974 LDBL extract_256(LDBL f, int *exp_ptr)
  975    {
  976    return extract_value(f, 256, exp_ptr);
  977    }
  978 
  979 /********************************************************************/
  980 /* calculates and returns the value of f*256^n                      */
  981 /* sort of like ldexp()                                             */
  982 /*                                                                  */
  983 /* n must be in the range -2^12 <= n < 2^12 (2^12=4096),            */
  984 /* which should not be a problem                                    */
  985 LDBL scale_256( LDBL f, int n )
  986    {
  987    return scale_value( f, 256, n );
  988    }
  989