File: common\bignumc.c

    1 /* bignumc.c - C routines equivalent to ASM routines in bignuma.asm */
    2 
    3 /*
    4 Wesley Loewer's Big Numbers.        (C) 1994-95, Wesley B. Loewer
    5 */
    6 
    7 #include <memory.h>
    8   /* see Fractint.c for a description of the "include"  hierarchy */
    9 #include "port.h"
   10 #include "big.h"
   11 
   12 /********************************************************************
   13  The following code contains the C versions of the routines from the
   14  file BIGNUMA.ASM.  It is provided here for portibility and for clarity.
   15 *********************************************************************/
   16 
   17 /********************************************************************
   18  Note:  The C code must be able to detect over/underflow.  Therefore
   19  32 bit integers must be used when doing 16 bit math.  All we really
   20  need is one more bit, such as is provided in asm with the carry bit.
   21  Functions that don't need the test for over/underflow, such as cmp_bn()
   22  and is_bn_not_zero(), can use 32 bit integers as as long as bnstep
   23  is set to 4.
   24 
   25  The 16/32 bit compination of integer sizes could be increased to
   26  32/64 bit to improve efficiency, but since many compilers don't offer
   27  64 bit integers, this option was not included.
   28 
   29 *********************************************************************/
   30 
   31 /********************************************************************/
   32 /* r = 0 */
   33 bn_t clear_bn(bn_t r)
   34     {
   35 #ifdef BIG_BASED
   36     _fmemset( r, 0, bnlength); /* set array to zero */
37 #else 38 #ifdef BIG_FAR 39 _fmemset( r, 0, bnlength); /* set array to zero */ 40 #else 41 memset( r, 0, bnlength); /* set array to zero */ 42 #endif
43 #endif 44 return r; 45 } 46 47 /********************************************************************/ 48 /* r = max positive value */ 49 bn_t max_bn(bn_t r) 50 { 51 #ifdef BIG_BASED 52 _fmemset( r, 0xFF, bnlength-1); /* set to max values */
53 #else 54 #ifdef BIG_FAR 55 _fmemset( r, 0xFF, bnlength-1); /* set to max values */ 56 #else 57 memset( r, 0xFF, bnlength-1); /* set to max values */ 58 #endif
59 #endif 60 r[bnlength-1] = 0x7F; /* turn off the sign bit */ 61 return r; 62 } 63 64 /********************************************************************/ 65 /* r = n */ 66 bn_t copy_bn(bn_t r, bn_t n) 67 { 68 #ifdef BIG_BASED 69 _fmemcpy( r, n, bnlength);
70 #else 71 #ifdef BIG_FAR 72 _fmemcpy( r, n, bnlength); 73 #else 74 memcpy( r, n, bnlength); 75 #endif
76 #endif 77 return r; 78 } 79 80 /***************************************************************************/ 81 /* n1 != n2 ? */ 82 /* RETURNS: */ 83 /* if n1 == n2 returns 0 */ 84 /* if n1 > n2 returns a positive (bytes left to go when mismatch occured) */ 85 /* if n1 < n2 returns a negative (bytes left to go when mismatch occured) */ 86 int cmp_bn(bn_t n1, bn_t n2) 87 { 88 int i; 89 S16 Svalue1, Svalue2; 90 U16 value1, value2; 91 92 /* two bytes at a time */ 93 /* signed comparison for msb */ 94 if ( (Svalue1=big_accessS16((S16 BIGDIST *)(n1+bnlength-2))) > 95 (Svalue2=big_accessS16((S16 BIGDIST *)(n2+bnlength-2))) ) 96 { /* now determine which of the two bytes was different */ 97 if ( (S16)(Svalue1&0xFF00) > (S16)(Svalue2&0xFF00) ) /* compare just high bytes */ 98 return (bnlength); /* high byte was different */ 99 else 100 return (bnlength-1); /* low byte was different */ 101 } 102 else if (Svalue1 < Svalue2) 103 { /* now determine which of the two bytes was different */ 104 if ( (S16)(Svalue1&0xFF00) < (S16)(Svalue2&0xFF00) ) /* compare just high bytes */ 105 return -(bnlength); /* high byte was different */ 106 else 107 return -(bnlength-1); /* low byte was different */ 108 } 109 110 /* unsigned comparison for the rest */ 111 for (i=bnlength-4; i>=0; i-=2) 112 { 113 if ( (value1=big_access16(n1+i)) > (value2=big_access16(n2+i)) ) 114 { /* now determine which of the two bytes was different */ 115 if ( (value1&0xFF00) > (value2&0xFF00) ) /* compare just high bytes */ 116 return (i+2); /* high byte was different */ 117 else 118 return (i+1); /* low byte was different */ 119 } 120 else if (value1 < value2) 121 { /* now determine which of the two bytes was different */ 122 if ( (value1&0xFF00) < (value2&0xFF00) ) /* compare just high bytes */ 123 return -(i+2); /* high byte was different */ 124 else 125 return -(i+1); /* low byte was different */ 126 } 127 } 128 return 0; 129 } 130 131 /********************************************************************/ 132 /* r < 0 ? */ 133 /* returns 1 if negative, 0 if positive or zero */ 134 int is_bn_neg(bn_t n) 135 { 136 return (S8)n[bnlength-1] < 0; 137 } 138 139 /********************************************************************/ 140 /* n != 0 ? */ 141 /* RETURNS: if n != 0 returns 1 */ 142 /* else returns 0 */ 143 int is_bn_not_zero(bn_t n) 144 { 145 int i; 146 147 /* two bytes at a time */ 148 for (i=0; i<bnlength; i+=2) 149 if (big_access16(n+i) != 0) 150 return 1; 151 return 0; 152 } 153 154 /********************************************************************/ 155 /* r = n1 + n2 */ 156 bn_t add_bn(bn_t r, bn_t n1, bn_t n2) 157 { 158 int i; 159 U32 sum=0; 160 161 /* two bytes at a time */ 162 for (i=0; i<bnlength; i+=2) 163 { 164 sum += (U32)big_access16(n1+i) + (U32)big_access16(n2+i); /* add 'em up */ 165 big_set16(r+i, (U16)sum); /* store the lower 2 bytes */ 166 sum >>= 16; /* shift the overflow for next time */ 167 } 168 return r; 169 } 170 171 /********************************************************************/ 172 /* r += n */ 173 bn_t add_a_bn(bn_t r, bn_t n) 174 { 175 int i; 176 U32 sum=0; 177 178 /* two bytes at a time */ 179 for (i=0; i<bnlength; i+=2) 180 { 181 sum += (U32)big_access16(r+i) + (U32)big_access16(n+i); /* add 'em up */ 182 big_set16(r+i, (U16)sum); /* store the lower 2 bytes */ 183 sum >>= 16; /* shift the overflow for next time */ 184 } 185 return r; 186 } 187 188 /********************************************************************/ 189 /* r = n1 - n2 */ 190 bn_t sub_bn(bn_t r, bn_t n1, bn_t n2) 191 { 192 int i; 193 U32 diff=0; 194 195 /* two bytes at a time */ 196 for (i=0; i<bnlength; i+=2) 197 { 198 diff = (U32)big_access16(n1+i) - ((U32)big_access16(n2+i)-(S32)(S16)diff); /* subtract with borrow */ 199 big_set16(r+i, (U16)diff); /* store the lower 2 bytes */ 200 diff >>= 16; /* shift the underflow for next time */ 201 } 202 return r; 203 } 204 205 /********************************************************************/ 206 /* r -= n */ 207 bn_t sub_a_bn(bn_t r, bn_t n) 208 { 209 int i; 210 U32 diff=0; 211 212 /* two bytes at a time */ 213 for (i=0; i<bnlength; i+=2) 214 { 215 diff = (U32)big_access16(r+i) - ((U32)big_access16(n+i)-(S32)(S16)diff); /* subtract with borrow */ 216 big_set16(r+i, (U16)diff); /* store the lower 2 bytes */ 217 diff >>= 16; /* shift the underflow for next time */ 218 } 219 return r; 220 } 221 222 /********************************************************************/ 223 /* r = -n */ 224 bn_t neg_bn(bn_t r, bn_t n) 225 { 226 int i; 227 U16 t_short; 228 U32 neg=1; /* to get the 2's complement started */ 229 230 /* two bytes at a time */ 231 for (i=0; neg != 0 && i<bnlength; i+=2) 232 { 233 t_short = ~big_access16(n+i); 234 neg += ((U32)t_short); /* two's complement */ 235 big_set16(r+i, (U16)neg); /* store the lower 2 bytes */ 236 neg >>= 16; /* shift the sign bit for next time */ 237 } 238 /* if neg was 0, then just "not" the rest */ 239 for (; i<bnlength; i+=2) 240 { /* notice that big_access16() and big_set16() are not needed here */ 241 *(U16 BIGDIST *)(r+i) = ~*(U16 BIGDIST *)(n+i); /* toggle all the bits */ 242 } 243 return r; 244 } 245 246 /********************************************************************/ 247 /* r *= -1 */ 248 bn_t neg_a_bn(bn_t r) 249 { 250 int i; 251 U16 t_short; 252 U32 neg=1; /* to get the 2's complement started */ 253 254 /* two bytes at a time */ 255 for (i=0; neg != 0 && i<bnlength; i+=2) 256 { 257 t_short = ~big_access16(r+i); 258 neg += ((U32)t_short); /* two's complement */ 259 big_set16(r+i, (U16)neg); /* store the lower 2 bytes */ 260 neg >>= 16; /* shift the sign bit for next time */ 261 } 262 /* if neg was 0, then just "not" the rest */ 263 for (; i<bnlength; i+=2) 264 { /* notice that big_access16() and big_set16() are not needed here */ 265 *(U16 BIGDIST *)(r+i) = ~*(U16 BIGDIST *)(r+i); /* toggle all the bits */ 266 } 267 return r; 268 } 269 270 /********************************************************************/ 271 /* r = 2*n */ 272 bn_t double_bn(bn_t r, bn_t n) 273 { 274 int i; 275 U32 prod=0; 276 277 /* two bytes at a time */ 278 for (i=0; i<bnlength; i+=2) 279 { 280 prod += (U32)big_access16(n+i)<<1 ; /* double it */ 281 big_set16(r+i, (U16)prod); /* store the lower 2 bytes */ 282 prod >>= 16; /* shift the overflow for next time */ 283 } 284 return r; 285 } 286 287 /********************************************************************/ 288 /* r *= 2 */ 289 bn_t double_a_bn(bn_t r) 290 { 291 int i; 292 U32 prod=0; 293 294 /* two bytes at a time */ 295 for (i=0; i<bnlength; i+=2) 296 { 297 prod += (U32)big_access16(r+i)<<1 ; /* double it */ 298 big_set16(r+i, (U16)prod); /* store the lower 2 bytes */ 299 prod >>= 16; /* shift the overflow for next time */ 300 } 301 return r; 302 } 303 304 /********************************************************************/ 305 /* r = n/2 */ 306 bn_t half_bn(bn_t r, bn_t n) 307 { 308 int i; 309 U32 quot=0; 310 311 /* two bytes at a time */ 312 313 /* start with an arithmetic shift */ 314 i=bnlength-2; 315 quot += (U32)(((S32)(S16)big_access16(n+i)<<16)>>1) ; /* shift to upper 2 bytes and half it */ 316 big_set16(r+i, (U16)(quot>>16)); /* store the upper 2 bytes */ 317 quot <<= 16; /* shift the underflow for next time */ 318 319 for (i=bnlength-4; i>=0; i-=2) 320 { 321 /* looks wierd, but properly sign extends argument */ 322 quot += (U32)(((U32)big_access16(n+i)<<16)>>1) ; /* shift to upper 2 bytes and half it */ 323 big_set16(r+i, (U16)(quot>>16)); /* store the upper 2 bytes */ 324 quot <<= 16; /* shift the underflow for next time */ 325 } 326 327 return r; 328 } 329 330 /********************************************************************/ 331 /* r /= 2 */ 332 bn_t half_a_bn(bn_t r) 333 { 334 int i; 335 U32 quot=0; 336 337 /* two bytes at a time */ 338 339 /* start with an arithmetic shift */ 340 i=bnlength-2; 341 quot += (U32)(((S32)(S16)big_access16(r+i)<<16)>>1) ; /* shift to upper 2 bytes and half it */ 342 big_set16(r+i, (U16)(quot>>16)); /* store the upper 2 bytes */ 343 quot <<= 16; /* shift the underflow for next time */ 344 345 for (i=bnlength-4; i>=0; i-=2) 346 { 347 /* looks wierd, but properly sign extends argument */ 348 quot += (U32)(((U32)(U16)big_access16(r+i)<<16)>>1) ; /* shift to upper 2 bytes and half it */ 349 big_set16(r+i, (U16)(quot>>16)); /* store the upper 2 bytes */ 350 quot <<= 16; /* shift the underflow for next time */ 351 } 352 return r; 353 } 354 355 /************************************************************************/ 356 /* r = n1 * n2 */ 357 /* Note: r will be a double wide result, 2*bnlength */ 358 /* n1 and n2 can be the same pointer */ 359 /* SIDE-EFFECTS: n1 and n2 are changed to their absolute values */ 360 bn_t unsafe_full_mult_bn(bn_t r, bn_t n1, bn_t n2) 361 { 362 int sign1, sign2 = 0, samevar; 363 int i, j, k, steps, doublesteps, carry_steps; 364 bn_t n1p, n2p; /* pointers for n1, n2 */ 365 bn_t rp1, rp2, rp3; /* pointers for r */ 366 U32 prod, sum; 367 368 if ((sign1 = is_bn_neg(n1)) != 0) /* =, not == */ 369 neg_a_bn(n1); 370 samevar = (n1 == n2); 371 if (!samevar) /* check to see if they're the same pointer */ 372 if ((sign2 = is_bn_neg(n2)) != 0) /* =, not == */ 373 neg_a_bn(n2); 374 375 n1p = n1; 376 steps = bnlength>>1; /* two bytes at a time */ 377 carry_steps = doublesteps = (steps<<1) - 2; 378 bnlength <<= 1; 379 clear_bn(r); /* double width */ 380 bnlength >>= 1; 381 rp1 = rp2 = r; 382 for (i = 0; i < steps; i++) 383 { 384 n2p = n2; 385 for (j = 0; j < steps; j++) 386 { 387 prod = (U32)big_access16(n1p) * (U32)big_access16(n2p); /* U16*U16=U32 */ 388 sum = (U32)big_access16(rp2) + prod; /* add to previous, including overflow */ 389 big_set16(rp2, (U16)sum); /* save the lower 2 bytes */ 390 sum >>= 16; /* keep just the upper 2 bytes */ 391 rp3 = rp2 + 2; /* move over 2 bytes */ 392 sum += big_access16(rp3); /* add what was the upper two bytes */ 393 big_set16(rp3 ,(U16)sum); /* save what was the upper two bytes */ 394 sum >>= 16; /* keep just the overflow */ 395 for (k=0; sum != 0 && k<carry_steps; k++) 396 { 397 rp3 += 2; /* move over 2 bytes */ 398 sum += big_access16(rp3); /* add to what was the overflow */ 399 big_set16(rp3, (U16)sum); /* save what was the overflow */ 400 sum >>= 16; /* keep just the new overflow */ 401 } 402 n2p += 2; /* to next word */ 403 rp2 += 2; 404 carry_steps--; /* use one less step */ 405 } 406 n1p += 2; /* to next word */ 407 rp2 = rp1 += 2; 408 carry_steps = --doublesteps; /* decrease doubles steps and reset carry_steps */ 409 } 410 411 /* if they were the same or same sign, the product must be positive */ 412 if (!samevar && sign1 != sign2) 413 { 414 bnlength <<= 1; /* for a double wide number */ 415 neg_a_bn(r); 416 bnlength >>= 1; /* restore bnlength */ 417 } 418 return r; 419 } 420 421 /************************************************************************/ 422 /* r = n1 * n2 calculating only the top rlength bytes */ 423 /* Note: r will be of length rlength */ 424 /* 2*bnlength <= rlength < bnlength */ 425 /* n1 and n2 can be the same pointer */ 426 /* SIDE-EFFECTS: n1 and n2 are changed to their absolute values */ 427 bn_t unsafe_mult_bn(bn_t r, bn_t n1, bn_t n2) 428 { 429 int sign1, sign2 = 0, samevar; 430 int i, j, k, steps, doublesteps, carry_steps, skips; 431 bn_t n1p, n2p; /* pointers for n1, n2 */ 432 bn_t rp1, rp2, rp3; /* pointers for r */ 433 U32 prod, sum; 434 int bnl; /* temp bnlength holder */ 435 436 bnl = bnlength; 437 if ((sign1 = is_bn_neg(n1)) != 0) /* =, not == */ 438 neg_a_bn(n1); 439 samevar = (n1 == n2); 440 if (!samevar) /* check to see if they're the same pointer */ 441 if ((sign2 = is_bn_neg(n2)) != 0) /* =, not == */ 442 neg_a_bn(n2); 443 n1p = n1; 444 n2 += (bnlength<<1) - rlength; /* shift n2 over to where it is needed */ 445 446 bnlength = rlength; 447 clear_bn(r); /* zero out r, rlength width */ 448 bnlength = bnl; 449 450 steps = (rlength-bnlength)>>1; 451 skips = (bnlength>>1) - steps; 452 carry_steps = doublesteps = (rlength>>1)-2; 453 rp2 = rp1 = r; 454 for (i=bnlength>>1; i>0; i--) 455 { 456 n2p = n2; 457 for (j=0; j<steps; j++) 458 { 459 prod = (U32)big_access16(n1p) * (U32)big_access16(n2p); /* U16*U16=U32 */ 460 sum = (U32)big_access16(rp2) + prod; /* add to previous, including overflow */ 461 big_set16(rp2, (U16)sum); /* save the lower 2 bytes */ 462 sum >>= 16; /* keep just the upper 2 bytes */ 463 rp3 = rp2 + 2; /* move over 2 bytes */ 464 sum += big_access16(rp3); /* add what was the upper two bytes */ 465 big_set16(rp3, (U16)sum); /* save what was the upper two bytes */ 466 sum >>= 16; /* keep just the overflow */ 467 for (k=0; sum != 0 && k<carry_steps; k++) 468 { 469 rp3 += 2; /* move over 2 bytes */ 470 sum += big_access16(rp3); /* add to what was the overflow */ 471 big_set16(rp3, (U16)sum); /* save what was the overflow */ 472 sum >>= 16; /* keep just the new overflow */ 473 } 474 n2p += 2; /* increase by two bytes */ 475 rp2 += 2; 476 carry_steps--; 477 } 478 n1p += 2; /* increase by two bytes */ 479 480 if (skips != 0) 481 { 482 n2 -= 2; /* shift n2 back a word */ 483 steps++; /* one more step this time */ 484 skips--; /* keep track of how many times we've done this */ 485 } 486 else 487 { 488 rp1 += 2; /* shift forward a word */ 489 doublesteps--; /* reduce the carry steps needed next time */ 490 } 491 rp2 = rp1; 492 carry_steps = doublesteps; 493 } 494 495 /* if they were the same or same sign, the product must be positive */ 496 if (!samevar && sign1 != sign2) 497 { 498 bnlength = rlength; 499 neg_a_bn(r); /* wider bignumber */ 500 bnlength = bnl; 501 } 502 return r; 503 } 504 505 /************************************************************************/ 506 /* r = n^2 */ 507 /* because of the symetry involved, n^2 is much faster than n*n */ 508 /* for a bignumber of length l */ 509 /* n*n takes l^2 multiplications */ 510 /* n^2 takes (l^2+l)/2 multiplications */ 511 /* which is about 1/2 n*n as l gets large */ 512 /* uses the fact that (a+b+c+...)^2 = (a^2+b^2+c^2+...)+2(ab+ac+bc+...)*/ 513 /* */ 514 /* SIDE-EFFECTS: n is changed to its absolute value */ 515 bn_t unsafe_full_square_bn(bn_t r, bn_t n) 516 { 517 int i, j, k, steps, doublesteps, carry_steps; 518 bn_t n1p, n2p; 519 bn_t rp1, rp2, rp3; 520 U32 prod, sum; 521 522 if (is_bn_neg(n)) /* don't need to keep track of sign since the */ 523 neg_a_bn(n); /* answer must be positive. */ 524 525 bnlength <<= 1; 526 clear_bn(r); /* zero out r, double width */ 527 bnlength >>= 1; 528 529 steps = (bnlength>>1)-1; 530 carry_steps = doublesteps = (steps<<1) - 1; 531 rp2 = rp1 = r + 2; /* start with second two-byte word */ 532 n1p = n; 533 if (steps != 0) /* if zero, then skip all the middle term calculations */ 534 { 535 for (i=steps; i>0; i--) /* steps gets altered, count backwards */ 536 { 537 n2p = n1p + 2; /* set n2p pointer to 1 step beyond n1p */ 538 for (j=0; j<steps; j++) 539 { 540 prod = (U32)big_access16(n1p) * (U32)big_access16(n2p); /* U16*U16=U32 */ 541 sum = (U32)big_access16(rp2) + prod; /* add to previous, including overflow */ 542 big_set16(rp2, (U16)sum); /* save the lower 2 bytes */ 543 sum >>= 16; /* keep just the upper 2 bytes */ 544 rp3 = rp2 + 2; /* move over 2 bytes */ 545 sum += big_access16(rp3); /* add what was the upper two bytes */ 546 big_set16(rp3, (U16)sum); /* save what was the upper two bytes */ 547 sum >>= 16; /* keep just the overflow */ 548 for (k=0; sum != 0 && k<carry_steps; k++) 549 { 550 rp3 += 2; /* move over 2 bytes */ 551 sum += big_access16(rp3); /* add to what was the overflow */ 552 big_set16(rp3, (U16)sum); /* save what was the overflow */ 553 sum >>= 16; /* keep just the new overflow */ 554 } 555 n2p += 2; /* increase by two bytes */ 556 rp2 += 2; 557 carry_steps--; 558 } 559 n1p += 2; /* increase by two bytes */ 560 rp2 = rp1 += 4; /* increase by 2 * two bytes */ 561 carry_steps = doublesteps -= 2; /* reduce the carry steps needed */ 562 steps--; 563 } 564 /* All the middle terms have been multiplied. Now double it. */ 565 bnlength <<= 1; /* double wide bignumber */ 566 double_a_bn(r); 567 bnlength >>= 1; 568 /* finished with middle terms */ 569 } 570 571 /* Now go back and add in the squared terms. */ 572 n1p = n; 573 steps = (bnlength>>1); 574 carry_steps = doublesteps = (steps<<1) - 2; 575 rp1 = r; 576 for (i=0; i<steps; i++) 577 { 578 /* square it */ 579 prod = (U32)big_access16(n1p) * (U32)big_access16(n1p); /* U16*U16=U32 */ 580 sum = (U32)big_access16(rp1) + prod; /* add to previous, including overflow */ 581 big_set16(rp1, (U16)sum); /* save the lower 2 bytes */ 582 sum >>= 16; /* keep just the upper 2 bytes */ 583 rp3 = rp1 + 2; /* move over 2 bytes */ 584 sum += big_access16(rp3); /* add what was the upper two bytes */ 585 big_set16(rp3, (U16)sum); /* save what was the upper two bytes */ 586 sum >>= 16; /* keep just the overflow */ 587 for (k=0; sum != 0 && k<carry_steps; k++) 588 { 589 rp3 += 2; /* move over 2 bytes */ 590 sum += big_access16(rp3); /* add to what was the overflow */ 591 big_set16(rp3, (U16)sum); /* save what was the overflow */ 592 sum >>= 16; /* keep just the new overflow */ 593 } 594 n1p += 2; /* increase by 2 bytes */ 595 rp1 += 4; /* increase by 4 bytes */ 596 carry_steps = doublesteps -= 2; 597 } 598 return r; 599 } 600 601 602 /************************************************************************/ 603 /* r = n^2 */ 604 /* because of the symetry involved, n^2 is much faster than n*n */ 605 /* for a bignumber of length l */ 606 /* n*n takes l^2 multiplications */ 607 /* n^2 takes (l^2+l)/2 multiplications */ 608 /* which is about 1/2 n*n as l gets large */ 609 /* uses the fact that (a+b+c+...)^2 = (a^2+b^2+c^2+...)+2(ab+ac+bc+...)*/ 610 /* */ 611 /* Note: r will be of length rlength */ 612 /* 2*bnlength >= rlength > bnlength */ 613 /* SIDE-EFFECTS: n is changed to its absolute value */ 614 bn_t unsafe_square_bn(bn_t r, bn_t n) 615 { 616 int i, j, k, steps, doublesteps, carry_steps; 617 int skips, rodd; 618 bn_t n1p, n2p, n3p; 619 bn_t rp1, rp2, rp3; 620 U32 prod, sum; 621 int bnl; 622 623 /* This whole procedure would be a great deal simpler if we could assume that */ 624 /* rlength < 2*bnlength (that is, not =). Therefore, we will take the */ 625 /* easy way out and call full_square_bn() if it is. */ 626 if (rlength == (bnlength<<1)) /* rlength == 2*bnlength */ 627 return unsafe_full_square_bn(r, n); /* call full_square_bn() and quit */ 628 629 if (is_bn_neg(n)) /* don't need to keep track of sign since the */ 630 neg_a_bn(n); /* answer must be positive. */ 631 632 bnl = bnlength; 633 bnlength = rlength; 634 clear_bn(r); /* zero out r, of width rlength */ 635 bnlength = bnl; 636 637 /* determine whether r is on an odd or even two-byte word in the number */ 638 rodd = (U16)(((bnlength<<1)-rlength)>>1) & 0x0001; 639 i = (bnlength>>1)-1; 640 steps = (rlength-bnlength)>>1; 641 carry_steps = doublesteps = (bnlength>>1)+steps-2; 642 skips = (i - steps)>>1; /* how long to skip over pointer shifts */ 643 rp2 = rp1 = r; 644 n1p = n; 645 n3p = n2p = n1p + (((bnlength>>1)-steps)<<1); /* n2p = n1p + 2*(bnlength/2 - steps) */ 646 if (i != 0) /* if zero, skip middle term calculations */ 647 { 648 /* i is already set */ 649 for (; i>0; i--) 650 { 651 for (j=0; j<steps; j++) 652 { 653 prod = (U32)big_access16(n1p) * (U32)big_access16(n2p); /* U16*U16=U32 */ 654 sum = (U32)big_access16(rp2) + prod; /* add to previous, including overflow */ 655 big_set16(rp2, (U16)sum); /* save the lower 2 bytes */ 656 sum >>= 16; /* keep just the upper 2 bytes */ 657 rp3 = rp2 + 2; /* move over 2 bytes */ 658 sum += big_access16(rp3); /* add what was the upper two bytes */ 659 big_set16(rp3, (U16)sum); /* save what was the upper two bytes */ 660 sum >>= 16; /* keep just the overflow */ 661 for (k=0; sum != 0 && k<carry_steps; k++) 662 { 663 rp3 += 2; /* move over 2 bytes */ 664 sum += big_access16(rp3); /* add to what was the overflow */ 665 big_set16(rp3, (U16)sum); /* save what was the overflow */ 666 sum >>= 16; /* keep just the new overflow */ 667 } 668 n2p += 2; /* increase by 2-byte word size */ 669 rp2 += 2; 670 carry_steps--; 671 } 672 n1p += 2; /* increase by 2-byte word size */ 673 if (skips > 0) 674 { 675 n2p = n3p -= 2; 676 steps++; 677 skips--; 678 } 679 else if (skips == 0) /* only gets executed once */ 680 { 681 steps -= rodd; /* rodd is 1 or 0 */ 682 doublesteps -= rodd+1; 683 rp1 += (rodd+1)<<1; 684 n2p = n1p+2; 685 skips--; 686 } 687 else /* skips < 0 */ 688 { 689 steps--; 690 doublesteps -= 2; 691 rp1 += 4; /* add two 2-byte words */ 692 n2p = n1p + 2; 693 } 694 rp2 = rp1; 695 carry_steps = doublesteps; 696 } 697 /* All the middle terms have been multiplied. Now double it. */ 698 bnlength = rlength; 699 double_a_bn(r); 700 bnlength = bnl; 701 } 702 /* Now go back and add in the squared terms. */ 703 704 /* be careful, the next dozen or so lines are confusing! */ 705 /* determine whether r is on an odd or even word in the number */ 706 /* using i as a temporary variable here */ 707 i = (bnlength<<1)-rlength; 708 rp1 = r + ((U16)i & (U16)0x0002); 709 i = (U16)((i>>1)+1) & (U16)0xFFFE; 710 n1p = n + i; 711 /* i here is no longer a temp var., but will be used as a loop counter */ 712 i = (bnlength - i)>>1; 713 carry_steps = doublesteps = (i<<1)-2; 714 /* i is already set */ 715 for (; i>0; i--) 716 { 717 /* square it */ 718 prod = (U32)big_access16(n1p) * (U32)big_access16(n1p); /* U16*U16=U32 */ 719 sum = (U32)big_access16(rp1) + prod; /* add to previous, including overflow */ 720 big_set16(rp1, (U16)sum); /* save the lower 2 bytes */ 721 sum >>= 16; /* keep just the upper 2 bytes */ 722 rp3 = rp1 + 2; /* move over 2 bytes */ 723 sum += big_access16(rp3); /* add what was the upper two bytes */ 724 big_set16(rp3, (U16)sum); /* save what was the upper two bytes */ 725 sum >>= 16; /* keep just the overflow */ 726 for (k=0; sum != 0 && k<carry_steps; k++) 727 { 728 rp3 += 2; /* move over 2 bytes */ 729 sum += big_access16(rp3); /* add to what was the overflow */ 730 big_set16(rp3, (U16)sum); /* save what was the overflow */ 731 sum >>= 16; /* keep just the new overflow */ 732 } 733 n1p += 2; 734 rp1 += 4; 735 carry_steps = doublesteps -= 2; 736 } 737 return r; 738 } 739 740 /********************************************************************/ 741 /* r = n * u where u is an unsigned integer */ 742 bn_t mult_bn_int(bn_t r, bn_t n, U16 u) 743 { 744 int i; 745 U32 prod=0; 746 747 /* two bytes at a time */ 748 for (i=0; i<bnlength; i+=2) 749 { 750 prod += (U32)big_access16(n+i) * u ; /* n*u */ 751 big_set16(r+i, (U16)prod); /* store the lower 2 bytes */ 752 prod >>= 16; /* shift the overflow for next time */ 753 } 754 return r; 755 } 756 757 /********************************************************************/ 758 /* r *= u where u is an unsigned integer */ 759 bn_t mult_a_bn_int(bn_t r, U16 u) 760 { 761 int i; 762 U32 prod=0; 763 764 /* two bytes at a time */ 765 for (i=0; i<bnlength; i+=2) 766 { 767 prod += (U32)big_access16(r+i) * u ; /* r*u */ 768 big_set16(r+i, (U16)prod); /* store the lower 2 bytes */ 769 prod >>= 16; /* shift the overflow for next time */ 770 } 771 return r; 772 } 773 774 /********************************************************************/ 775 /* r = n / u where u is an unsigned integer */ 776 bn_t unsafe_div_bn_int(bn_t r, bn_t n, U16 u) 777 { 778 int i, sign; 779 U32 full_number; 780 U16 quot, rem=0; 781 782 sign = is_bn_neg(n); 783 if (sign) 784 neg_a_bn(n); 785 786 if (u == 0) /* division by zero */ 787 { 788 max_bn(r); 789 if (sign) 790 neg_a_bn(r); 791 return r; 792 } 793 794 /* two bytes at a time */ 795 for (i=bnlength-2; i>=0; i-=2) 796 { 797 full_number = ((U32)rem<<16) + (U32)big_access16(n+i); 798 quot = (U16)(full_number / u); 799 rem = (U16)(full_number % u); 800 big_set16(r+i, quot); 801 } 802 803 if (sign) 804 neg_a_bn(r); 805 return r; 806 } 807 808 /********************************************************************/ 809 /* r /= u where u is an unsigned integer */ 810 bn_t div_a_bn_int(bn_t r, U16 u) 811 { 812 int i, sign; 813 U32 full_number; 814 U16 quot, rem=0; 815 816 sign = is_bn_neg(r); 817 if (sign) 818 neg_a_bn(r); 819 820 if (u == 0) /* division by zero */ 821 { 822 max_bn(r); 823 if (sign) 824 neg_a_bn(r); 825 return r; 826 } 827 828 /* two bytes at a time */ 829 for (i=bnlength-2; i>=0; i-=2) 830 { 831 full_number = ((U32)rem<<16) + (U32)big_access16(r+i); 832 quot = (U16)(full_number / u); 833 rem = (U16)(full_number % u); 834 big_set16(r+i, quot); 835 } 836 837 if (sign) 838 neg_a_bn(r); 839 return r; 840 } 841 842 /*********************************************************************/ 843 /* f = b */ 844 /* Converts a bignumber to a double */ 845 LDBL bntofloat(bn_t n) 846 { 847 int i; 848 int signflag=0; 849 int expon; 850 bn_t getbyte; 851 LDBL f=0; 852 853 if (is_bn_neg(n)) 854 { 855 signflag = 1; 856 neg_a_bn(n); 857 } 858 859 expon = intlength - 1; 860 getbyte = n + bnlength - 1; 861 while (*getbyte == 0 && getbyte >= n) 862 { 863 getbyte--; 864 expon--; 865 } 866 867 /* There is no need to use all bnlength bytes. To get the full */ 868 /* precision of LDBL, all you need is LDBL_MANT_DIG/8+1. */ 869 for (i = 0; i < (LDBL_MANT_DIG/8+1) && getbyte >= n; i++, getbyte--) 870 { 871 f += scale_256(*getbyte,-i); 872 } 873 874 f = scale_256(f,expon); 875 876 if (signflag) 877 { 878 f = -f; 879 neg_a_bn(n); 880 } 881 return f; 882 } 883 884 /*****************************************/ 885 /* the following used to be in bigfltc.c */ 886 887 /********************************************************************/ 888 /* r = 0 */ 889 bf_t clear_bf(bf_t r) 890 { 891 _fmemset( r, 0, bflength+2); /* set array to zero */ 892 return r; 893 } 894 895 /********************************************************************/ 896 /* r = n */ 897 bf_t copy_bf(bf_t r, bf_t n) 898 { 899 _fmemcpy( r, n, bflength+2); 900 return r; 901 } 902 903 /*********************************************************************/ 904 /* b = f */ 905 /* Converts a double to a bigfloat */ 906 bf_t floattobf(bf_t r, LDBL f) 907 { 908 int power; 909 int bnl, il; 910 if (f == 0) 911 { 912 clear_bf(r); 913 return r; 914 } 915 916 /* remove the exp part */ 917 f = extract_256(f, &power); 918 919 bnl = bnlength; 920 bnlength = bflength; 921 il = intlength; 922 intlength = 2; 923 floattobn(r, f); 924 bnlength = bnl; 925 intlength = il; 926 927 big_set16(r + bflength, (S16)power); /* exp */ 928 929 return r; 930 } 931 932 /*********************************************************************/ 933 /* b = f */ 934 /* Converts a double to a bigfloat */ 935 bf_t floattobf1(bf_t r, LDBL f) 936 { 937 char msg[80]; 938 #ifdef USE_LONG_DOUBLE 939 sprintf(msg,"%-.22Le",f);
940 #else 941 sprintf(msg,"%-.22le",f);
942 #endif 943 strtobf(r,msg); 944 return r; 945 } 946 947 /*********************************************************************/ 948 /* f = b */ 949 /* Converts a bigfloat to a double */ 950 LDBL bftofloat(bf_t n) 951 { 952 int power; 953 int bnl, il; 954 LDBL f; 955 956 bnl = bnlength; 957 bnlength = bflength; 958 il = intlength; 959 intlength = 2; 960 f = bntofloat(n); 961 bnlength = bnl; 962 intlength = il; 963 964 power = (S16)big_access16(n + bflength); 965 f = scale_256(f,power); 966 967 return f; 968 } 969 970 /********************************************************************/ 971 /* extracts the mantissa and exponent of f */ 972 /* finds m and n such that 1<=|m|<256 and f = m*256^n */ 973 /* n is stored in *exp_ptr and m is returned, sort of like frexp() */ 974 LDBL extract_256(LDBL f, int *exp_ptr) 975 { 976 return extract_value(f, 256, exp_ptr); 977 } 978 979 /********************************************************************/ 980 /* calculates and returns the value of f*256^n */ 981 /* sort of like ldexp() */ 982 /* */ 983 /* n must be in the range -2^12 <= n < 2^12 (2^12=4096), */ 984 /* which should not be a problem */ 985 LDBL scale_256( LDBL f, int n ) 986 { 987 return scale_value( f, 256, n ); 988 } 989