File: dos\bignuma.asm
1 ; bignuma.asm
2
3 ; based on:
4 ; bbignuma.asm - asm routines for bignumbers
5 ; Wesley Loewer's Big Numbers. (C) 1994-95, Wesley B. Loewer
; based pointer version
; See BIGLIB.TXT for further documentation.
; general programming notes for bases pointer version
; ALL big_t pointers must have a segment value equal to bignum_seg.
; single arg procedures, p(r), r = bx (or si when required)
; two arg procedures, p(r,n), r=di, n=bx(or si when required)
; two arg procedures, p(n1,n2), n1=bx(or si when required), n2=di
; three arg proc, p(r,n1,n2), r=di, n1=si, n2=bx
; unless otherwise noted, such as full_mult, mult, full_square, square
.MODEL medium, c
include big.inc
include bigport.inc
.DATA
.CODE
.8086
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; r = 0
clear_bn PROC USES di, r:bn_t
mov cx, bnlength
mov di, word ptr r
mov es, bignum_seg ; load pointer in es:di
IFDEF BIG16AND32
cmp cpu, 386 ; check cpu
jae short use_32_bit ; use faster 32 bit code if possible
ENDIF
IFDEF BIG16
sub ax, ax ; clear ax
shr cx, 1 ; 1 byte = 1/2 word
rep stosw ; clear r, word at a time
ENDIF
IFDEF BIG16AND32
jmp bottom
ENDIF
IFDEF BIG32
use_32_bit:
.386
sub eax, eax ; clear eax
shr cx, 2 ; 1 byte = 1/4 word
rep stosd ; clear r, dword at a time
ENDIF
bottom:
.8086
mov ax, word ptr r ; return r in ax
ret
clear_bn ENDP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; r = max positive value
max_bn PROC USES di, r:bn_t
mov cx, bnlength
mov di, word ptr r
mov es, bignum_seg ; load pointer in es:di
IFDEF BIG16AND32
cmp cpu, 386 ; check cpu
jae short use_32_bit ; use faster 32 bit code if possible
ENDIF
IFDEF BIG16
mov ax, 0FFFFh ; set ax to max value
shr cx, 1 ; 1 byte = 1/2 word
rep stosw ; max out r, word at a time
ENDIF
IFDEF BIG16AND32
jmp bottom
ENDIF
IFDEF BIG32
use_32_bit:
.386
mov eax, 0FFFFFFFFh ; set eax to max value
shr cx, 2 ; 1 byte = 1/4 word
rep stosd ; max out r, dword at a time
ENDIF
bottom:
.8086
; when the above stos is finished, di points to the byte past the end
mov byte ptr es:[di-1], 7Fh ; turn off the sign bit
mov ax, word ptr r ; return r in ax
ret
max_bn ENDP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; r = n
copy_bn PROC USES di si, r:bn_t, n:bn_t
mov ax, ds ; save ds for later
mov cx, bnlength
mov di, word ptr r
mov es, bignum_seg ; load pointer in es:di
mov si, word ptr n
IFDEF BIG16AND32
cmp cpu, 386 ; check cpu
jae short use_32_bit ; use faster 32 bit code if possible
ENDIF
IFDEF BIG16
mov ds, bignum_seg ; load pointer in ds:si for movs
shr cx, 1 ; 1 byte = 1/2 word
rep movsw ; copy word at a time
ENDIF
IFDEF BIG16AND32
jmp bottom
ENDIF
IFDEF BIG32
use_32_bit:
.386
mov ds, bignum_seg ; load pointer in ds:si for movs
shr cx, 2 ; 1 byte = 1/4 word
rep movsd ; copy dword at a time
ENDIF
bottom:
.8086
mov ds, ax ; restore ds
mov ax, word ptr r ; return r in ax
ret
copy_bn ENDP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; n1 != n2 ?
; RETURNS: if n1 == n2 returns 0
; if n1 > n2 returns a positive (steps left to go when mismatch occured)
; if n1 < n2 returns a negative (steps left to go when mismatch occured)
cmp_bn PROC USES di, n1:bn_t, n2:bn_t
push ds ; save DS
mov cx, bnlength
mov dx, cx ; save bnlength for later comparison
mov di, word ptr n2 ; load n2 pointer in di
mov bx, word ptr n1 ; load n1 pointer in bx
add bx, cx ; point to end of bignumbers
add di, cx ; where the msb is
IFDEF BIG16AND32
cmp cpu, 386 ; check cpu
jae short use_32_bit ; use faster 32 bit code if possible
ENDIF
IFDEF BIG16
mov ds, bignum_seg ; load ds
shr cx, 1 ; byte = 1/2 word
top_loop_16:
sub bx, 2 ; decrement to previous word
sub di, 2
mov ax, ds:[bx] ; load n1
cmp ax, ds:[di] ; compare to n2
jne not_match_16 ; don't match
6 loop top_loop_16
7 jmp match ; cx is zero
8 not_match_16:
9 ; now determine which byte of the two did not match
10 shl cx, 1 ; convert back to bytes
11 cmp ah, ds:[di+1] ; compare to n2
12 jne bottom ; jump if ah doesn't match
; if ah does match, then mismatch was in al
dec cx ; decrement cx by 1 to show match
cmp al, ds:[di] ; reset the flags for below
jmp bottom
ENDIF
IFDEF BIG32
use_32_bit:
.386
mov ds, bignum_seg ; load ds
shr cx, 2 ; byte = 1/4 dword
top_loop_32:
sub bx, 4 ; decrement to previous dword
sub di, 4
mov eax, ds:[bx] ; load n1
cmp eax, ds:[di] ; compare to n2
jne not_match_32 ; don't match
13 loop top_loop_32
14 jmp match ; cx is zero
15 not_match_32:
16 ; now determine which byte of the four did not match
17 shl cx, 2 ; convert back to bytes
18 mov ebx, eax
19 shr ebx, 16 ; shift ebx_high to bx
20 cmp bh, ds:[di+3] ; compare to n2
21 jne bottom ; jump if bh doesn't match
dec cx ; decrement cx by 1 to show match
cmp bl, ds:[di+2] ; compare to n2
jne bottom ; jump if bl doesn't match
22 dec cx ; decrement cx by 1 to show match
23 cmp ah, ds:[di+1] ; compare to n2
24 jne bottom ; jump if ah doesn't match
; if bh,bl,ah do match, then mismatch was in al
dec cx ; decrement cx by 1 to show match
cmp al, ds:[di] ; reset the flags for below
jmp bottom
ENDIF
bottom:
.8086
; flags are still set from last cmp
; if cx == dx, then most significant part didn't match, use signed comparison
25 ; else the decimals didn't match, use unsigned comparison
lahf ; load results of last cmp
cmp cx, dx ; did they differ on very first cmp
jne not_first_step ; no
sahf ; yes
jg n1_bigger ; signed comparison
jmp n2_bigger
not_first_step:
sahf
ja n1_bigger ; unsigned comparison
n2_bigger:
neg cx ; make it negative
n1_bigger: ; leave it positive
match: ; leave it zero
mov ax, cx
pop ds ; restore DS
ret
cmp_bn ENDP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; r < 0 ?
; returns 1 if negative, 0 if positive or zero
is_bn_neg PROC n:bn_t
; for a one-pass routine like this, don't bother with ds
26 mov bx, word ptr n
27 mov es, bignum_seg ; load n pointer in es:bx
28
29 add bx, bnlength ; find sign bit
30 mov al, es:[bx-1] ; got it
31
32 and al, 80h ; check the sign bit
33 rol al, 1 ; rotate sign big to bit 0
34 sub ah, ah ; clear upper ax
35 ret
36
37 is_bn_neg ENDP
38
39 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
40 ; n != 0 ?
41 ; RETURNS: if n != 0 returns 1
42 ; else returns 0
43 is_bn_not_zero PROC n:bn_t
44
45 mov ax, ds ; save DS
46 mov cx, bnlength
47 mov bx, word ptr n
48
49 IFDEF BIG16AND32
50 cmp cpu, 386 ; check cpu
51 jae short use_32_bit ; use faster 32 bit code if possible
52 ENDIF
53
54 IFDEF BIG16
55 mov ds, bignum_seg ; load n pointer in ds:bx
56 shr cx, 1 ; byte = 1/2 word
57 top_loop_16:
58 cmp word ptr ds:[bx], 0 ; compare to n to 0
59 jnz bottom ; not zero
60 add bx, 2 ; increment to next word
61 loop top_loop_16
62 ENDIF
63
64 IFDEF BIG16AND32
65 jmp bottom
66 ENDIF
67
68 IFDEF BIG32
69 use_32_bit:
70 .386
71 mov ds, bignum_seg ; load n pointer in ds:bx
72 shr cx, 2 ; byte = 1/4 dword
73 top_loop_32:
74 cmp dword ptr ds:[bx], 0 ; compare to n to 0
75 jnz bottom ; not zero
76 add bx, 4 ; increment to next dword
77 loop top_loop_32
78 jmp bottom
79 ENDIF
80
81 bottom:
82 .8086
83 mov ds, ax ; restore DS
84 ; if cx is zero, then n was zero
85 mov ax, cx
86 ret
87
88 is_bn_not_zero ENDP
89
90 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
91 ; r = n1 + n2
92 add_bn PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t
93
94 mov dx, ds ; save ds
95 mov cx, bnlength
96 mov di, WORD PTR r
97 mov si, WORD PTR n1
98 mov bx, WORD PTR n2
99
100
101 IFDEF BIG16AND32
102 cmp cpu, 386 ; check cpu
103 jae short use_32_bit ; use faster 32 bit code if possible
104 ENDIF
105
106 IFDEF BIG16
107 mov ds, bignum_seg ; load ds
108
109 shr cx, 1 ; byte = 1/2 word
110 clc ; clear carry flag
111
112 top_loop_16:
113 mov ax, ds:[si] ; n1
114 adc ax, ds:[bx] ; n1+n2
115 mov ds:[di], ax ; r = n1+n2
116
117 ; inc does not change carry flag
118 inc di ; add di, 2
119 inc di
120 inc si ; add si, 2
121 inc si
122 inc bx ; add bx, 2
123 inc bx
124
125 loop top_loop_16
126
127 ENDIF
128
129 IFDEF BIG16AND32
130 jmp short bottom
131 ENDIF
132
133 IFDEF BIG32
134 use_32_bit:
135 .386
136 mov ds, bignum_seg ; load ds
137
138 shr cx, 2 ; byte = 1/4 double word
139 clc ; clear carry flag
140
141 top_loop_32:
142 mov eax, ds:[si] ; n1
143 adc eax, ds:[bx] ; n1+n2
144 mov ds:[di], eax ; r = n1+n2
145
146 lahf ; save carry flag
147 add di, 4 ; increment by double word size
148 add si, 4
149 add bx, 4
150 sahf ; restore carry flag
151
152 loop top_loop_32
153 ENDIF
154
155 bottom:
156 .8086
157 mov ds, dx ; restore ds
158 mov ax, word ptr r ; return r in ax
159 ret
160 add_bn ENDP
161
162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
163 ; r += n
164 add_a_bn PROC USES di, r:bn_t, n:bn_t
165
166 mov dx, ds ; save ds
167 mov cx, bnlength
168 mov di, WORD PTR r
169 mov bx, WORD PTR n
170
171 IFDEF BIG16AND32
172 cmp cpu, 386 ; check cpu
173 jae short use_32_bit ; use faster 32 bit code if possible
174 ENDIF
175
176 IFDEF BIG16
177 mov ds, bignum_seg ; load ds
178
179 shr cx, 1 ; byte = 1/2 word
180 clc ; clear carry flag
181
182 top_loop_16:
183 mov ax, ds:[bx] ; n
184 adc ds:[di], ax ; r += n
185
186 ; inc does not change carry flag
187 inc di ; add di, 2
188 inc di
189 inc bx ; add di, 2
190 inc bx
191
192 loop top_loop_16
193 ENDIF
194
195 IFDEF BIG16AND32
196 jmp short bottom
197 ENDIF
198
199 IFDEF BIG32
200 use_32_bit:
201 .386
202 mov ds, bignum_seg ; load ds
203
204 shr cx, 2 ; byte = 1/4 double word
205 clc ; clear carry flag
206
207 top_loop_32:
208 mov eax, ds:[bx] ; n
209 adc ds:[di], eax ; r += n
210
211 lahf ; save carry flag
212 add di, 4 ; increment by double word size
213 add bx, 4
214 sahf ; restore carry flag
215
216 loop top_loop_32
217 ENDIF
218
219 bottom:
220 .8086
221 mov ds, dx ; restore ds
222 mov ax, word ptr r ; return r in ax
223 ret
224 add_a_bn ENDP
225
226 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
227 ; r = n1 - n2
228 sub_bn PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t
229
230 mov dx, ds ; save ds
231 mov cx, bnlength
232 mov di, WORD PTR r
233 mov si, WORD PTR n1
234 mov bx, WORD PTR n2
235
236
237 IFDEF BIG16AND32
238 cmp cpu, 386 ; check cpu
239 jae short use_32_bit ; use faster 32 bit code if possible
240 ENDIF
241
242 IFDEF BIG16
243 mov ds, bignum_seg ; load ds
244
245 shr cx, 1 ; byte = 1/2 word
246 clc ; clear carry flag
247
248 top_loop_16:
249 mov ax, ds:[si] ; n1
250 sbb ax, ds:[bx] ; n1-n2
251 mov ds:[di], ax ; r = n1-n2
252
253 ; inc does not change carry flag
254 inc di ; add di, 2
255 inc di
256 inc si ; add si, 2
257 inc si
258 inc bx ; add bx, 2
259 inc bx
260
261 loop top_loop_16
262 ENDIF
263
264 IFDEF BIG16AND32
265 jmp short bottom
266 ENDIF
267
268 IFDEF BIG32
269 use_32_bit:
270 .386
271 mov ds, bignum_seg ; load ds
272
273 shr cx, 2 ; byte = 1/4 double word
274 clc ; clear carry flag
275
276 top_loop_32:
277 mov eax, ds:[si] ; n1
278 sbb eax, ds:[bx] ; n1-n2
279 mov ds:[di], eax ; r = n1-n2
280
281 lahf ; save carry flag
282 add di, 4 ; increment by double word size
283 add si, 4
284 add bx, 4
285 sahf ; restore carry flag
286
287 loop top_loop_32
288 ENDIF
289
290 bottom:
291 .8086
292
293 mov ds, dx ; restore ds
294 mov ax, word ptr r ; return r in ax
295 ret
296 sub_bn ENDP
297
298 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
299 ; r -= n
300 sub_a_bn PROC USES di, r:bn_t, n:bn_t
301
302 mov dx, ds ; save ds
303 mov cx, bnlength
304 mov di, WORD PTR r
305 mov bx, WORD PTR n
306
307 IFDEF BIG16AND32
308 cmp cpu, 386 ; check cpu
309 jae short use_32_bit ; use faster 32 bit code if possible
310 ENDIF
311
312 IFDEF BIG16
313 mov ds, bignum_seg ; load ds
314
315 shr cx, 1 ; byte = 1/2 word
316 clc ; clear carry flag
317
318 top_loop_16:
319 mov ax, ds:[bx] ; n
320 sbb ds:[di], ax ; r -= n
321
322 ; inc does not change carry flag
323 inc di ; add di, 2
324 inc di
325 inc bx ; add di, 2
326 inc bx
327
328 loop top_loop_16
329 ENDIF
330
331 IFDEF BIG16AND32
332 jmp short bottom
333 ENDIF
334
335 IFDEF BIG32
336 use_32_bit:
337 .386
338 mov ds, bignum_seg ; load ds
339
340 shr cx, 2 ; byte = 1/4 double word
341 clc ; clear carry flag
342
343 top_loop_32:
344 mov eax, ds:[bx] ; n
345 sbb ds:[di], eax ; r -= n
346
347 lahf ; save carry flag
348 add di, 4 ; increment by double word size
349 add bx, 4
350 sahf ; restore carry flag
351
352 loop top_loop_32
353
354 ENDIF
355
356 bottom:
357 .8086
358
359 mov ds, dx ; restore ds
360 mov ax, word ptr r ; return r in ax
361 ret
362 sub_a_bn ENDP
363
364 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
365 ; r = -n
366 neg_bn PROC USES di, r:bn_t, n:bn_t
367
368 mov dx, ds ; save ds
369 mov cx, bnlength
370 mov di, WORD PTR r
371 mov bx, WORD PTR n
372
373 IFDEF BIG16AND32
374 cmp cpu, 386
375 jae short use_32_bit ; use faster 32 bit code if possible
376 ENDIF
377
378 IFDEF BIG16
379 mov ds, bignum_seg ; load ds
380
381 shr cx, 1 ; byte = 1/2 word
382
383 top_loop_16:
384 mov ax, ds:[bx]
385 neg ax
386 mov ds:[di], ax
387 jc short no_more_carry_16 ; notice the "reverse" logic here
388
389 add di, 2 ; increment by word size
390 add bx, 2
391
392 loop top_loop_16
393 jmp short bottom
394
395 no_more_carry_16:
396 add di, 2
397 add bx, 2
398 loop top_loop_no_more_carry_16 ; jump down
399 jmp short bottom
400
401 top_loop_no_more_carry_16:
402 mov ax, ds:[bx]
403 not ax
404 mov ds:[di], ax
405
406 add di, 2
407 add bx, 2
408
409 loop top_loop_no_more_carry_16
410 ENDIF
411
412 IFDEF BIG16AND32
413 jmp short bottom
414 ENDIF
415
416 IFDEF BIG32
417 use_32_bit:
418 .386
419 mov ds, bignum_seg ; load ds
420
421 shr cx, 2 ; byte = 1/4 dword
422
423 top_loop_32:
424 mov eax, ds:[bx]
425 neg eax
426 mov ds:[di], eax
427 jc short no_more_carry_32 ; notice the "reverse" logic here
428
429 add di, 4 ; increment by double word size
430 add bx, 4
431
432 loop top_loop_32
433 jmp short bottom
434
435 no_more_carry_32:
436 add di, 4 ; increment by double word size
437 add bx, 4
438 loop top_loop_no_more_carry_32 ; jump down
439 jmp short bottom
440
441 top_loop_no_more_carry_32:
442 mov eax, ds:[bx]
443 not eax
444 mov ds:[di], eax
445
446 add di, 4 ; increment by double word size
447 add bx, 4
448
449 loop top_loop_no_more_carry_32
450 ENDIF
451
452 bottom:
453 .8086
454
455 mov ds, dx ; restore ds
456 mov ax, word ptr r ; return r in ax
457 ret
458 neg_bn ENDP
459
460 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
461 ; r *= -1
462 neg_a_bn PROC r:bn_t
463
464 mov ax, ds ; save ds
465 mov cx, bnlength
466 mov bx, WORD PTR r
467
468 IFDEF BIG16AND32
469 cmp cpu, 386
470 jae short use_32_bit ; use faster 32 bit code if possible
471 ENDIF
472
473 IFDEF BIG16
474 mov ds, bignum_seg ; load ds
475 shr cx, 1 ; byte = 1/2 word
476
477 top_loop_16:
478 neg word ptr ds:[bx]
479 jc short no_more_carry_16 ; notice the "reverse" logic here
480
481 add bx, 2
482
483 loop top_loop_16
484 jmp short bottom
485
486 no_more_carry_16:
487 add bx, 2
488 loop top_loop_no_more_carry_16 ; jump down
489 jmp short bottom
490
491 top_loop_no_more_carry_16:
492 not word ptr ds:[bx]
493
494 add bx, 2
495
496 loop top_loop_no_more_carry_16
497 ENDIF
498
499 IFDEF BIG16AND32
500 jmp short bottom
501 ENDIF
502
503 IFDEF BIG32
504 use_32_bit:
505 .386
506 mov ds, bignum_seg ; load ds
507 shr cx, 2 ; byte = 1/4 dword
508
509 top_loop_32:
510 neg dword ptr ds:[bx]
511 jc short no_more_carry_32 ; notice the "reverse" logic here
512
513 add bx, 4
514
515 loop top_loop_32
516 jmp short bottom
517
518 no_more_carry_32:
519 add bx, 4
520 loop top_loop_no_more_carry_32 ; jump down
521 jmp short bottom
522
523 top_loop_no_more_carry_32:
524 not dword ptr ds:[bx]
525
526 add bx, 4
527
528 loop top_loop_no_more_carry_32
529 ENDIF
530
531 bottom:
532 .8086
533 mov ds, ax ; restore ds
534 mov ax, word ptr r ; return r in ax
535 ret
536 neg_a_bn ENDP
537
538 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
539 ; r = 2*n
540 double_bn PROC USES di, r:bn_t, n:bn_t
541
542 mov dx, ds ; save ds
543 mov cx, bnlength
544 mov di, WORD PTR r
545 mov bx, WORD PTR n
546
547 IFDEF BIG16AND32
548 cmp cpu, 386
549 jae short use_32_bit ; use faster 32 bit code if possible
550 ENDIF
551
552 IFDEF BIG16
553 mov ds, bignum_seg ; load ds
554
555 shr cx, 1 ; byte = 1/2 word
556 clc
557
558 top_loop_16:
559 mov ax, ds:[bx]
560 rcl ax, 1 ; rotate with carry left
561 mov ds:[di], ax
562
563 ; inc does not change carry flag
564 inc di ; add di, 2
565 inc di
566 inc bx ; add bx, 2
567 inc bx
568
569 loop top_loop_16
570 ENDIF
571
572 IFDEF BIG16AND32
573 jmp short bottom
574 ENDIF
575
576 IFDEF BIG32
577 use_32_bit:
578 .386
579 mov ds, bignum_seg ; load ds
580
581 shr cx, 2 ; byte = 1/4 dword
582 clc ; clear carry flag
583
584 top_loop_32:
585 mov eax, ds:[bx]
586 rcl eax, 1 ; rotate with carry left
587 mov ds:[di], eax
588
589 lahf ; save carry flag
590 add di, 4 ; increment by double word size
591 add bx, 4
592 sahf ; restore carry flag
593
594 loop top_loop_32
595
596 ENDIF
597 bottom:
598 .8086
599
600 mov ds, dx ; restore ds
601 mov ax, word ptr r ; return r in ax
602 ret
603 double_bn ENDP
604
605 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
606 ; r *= 2
607 double_a_bn PROC r:bn_t
608
609 mov ax, ds ; save ds
610 mov cx, bnlength
611 mov bx, WORD PTR r
612
613 IFDEF BIG16AND32
614 cmp cpu, 386
615 jae short use_32_bit ; use faster 32 bit code if possible
616 ENDIF
617
618 IFDEF BIG16
619 mov ds, bignum_seg ; load ds
620
621 shr cx, 1 ; byte = 1/2 word
622 clc
623
624 top_loop_16:
625 rcl word ptr ds:[bx], 1 ; rotate with carry left
626
627 ; inc does not change carry flag
628 inc bx ; add bx, 2
629 inc bx
630
631 loop top_loop_16
632 ENDIF
633
634 IFDEF BIG16AND32
635 jmp short bottom
636 ENDIF
637
638 IFDEF BIG32
639 use_32_bit:
640 .386
641 mov ds, bignum_seg ; load ds
642
643 shr cx, 2 ; byte = 1/4 dword
644 clc ; clear carry flag
645
646 top_loop_32:
647 rcl dword ptr ds:[bx], 1 ; rotate with carry left
648
649 inc bx ; add bx, 4 but keep carry flag
650 inc bx
651 inc bx
652 inc bx
653
654 loop top_loop_32
655 ENDIF
656
657 bottom:
658 .8086
659
660 mov ds, ax ; restore ds
661 mov ax, word ptr r ; return r in ax
662 ret
663 double_a_bn ENDP
664
665 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
666 ; r = n/2
667 half_bn PROC USES di, r:bn_t, n:bn_t
668
669 mov dx, ds ; save ds
670 mov cx, bnlength
671 mov di, WORD PTR r
672 mov bx, WORD PTR n
673
674 add di, cx ; start with msb
675 add bx, cx
676
677 IFDEF BIG16AND32
678 cmp cpu, 386
679 jae short use_32_bit ; use faster 32 bit code if possible
680 ENDIF
681
682 IFDEF BIG16
683 mov ds, bignum_seg ; load ds
684
685 shr cx, 1 ; byte = 1/2 word
686
687 ; handle the first step with sar, the rest with rcr
688 sub di, 2
689 sub bx, 2
690
691 mov ax, ds:[bx]
692 sar ax, 1 ; shift arithmetic right
693 mov ds:[di], ax
694
695 loop top_loop_16
696 jmp short bottom
697
698
699 top_loop_16:
700 ; inc does not change carry flag
701 dec di ; sub di, 2
702 dec di
703 dec bx ; sub bx, 2
704 dec bx
705
706 mov ax, ds:[bx]
707 rcr ax, 1 ; rotate with carry right
708 mov ds:[di], ax
709
710 loop top_loop_16
711 ENDIF
712
713 IFDEF BIG16AND32
714 jmp short bottom
715 ENDIF
716
717 IFDEF BIG32
718 use_32_bit:
719 .386
720 mov ds, bignum_seg ; load ds
721
722 shr cx, 2 ; byte = 1/4 dword
723
724 sub di, 4 ; decrement by double word size
725 sub bx, 4
726
727 mov eax, ds:[bx]
728 sar eax, 1 ; shift arithmetic right
729 mov ds:[di], eax
730
731 loop top_loop_32
732 jmp short bottom
733
734 top_loop_32:
735 lahf ; save carry flag
736 sub di, 4 ; decrement by double word size
737 sub bx, 4
738 sahf ; restore carry flag
739
740 mov eax, ds:[bx]
741 rcr eax, 1 ; rotate with carry right
742 mov ds:[di], eax
743
744 loop top_loop_32
745 ENDIF
746
747 bottom:
748 .8086
749
750 mov ds, dx ; restore ds
751 mov ax, word ptr r ; return r in ax
752 ret
753 half_bn ENDP
754
755 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
756 ; r /= 2
757 half_a_bn PROC r:bn_t
758
759 mov ax, ds ; save ds
760 mov cx, bnlength
761 mov bx, WORD PTR r
762
763 add bx, cx ; start with msb
764
765
766 IFDEF BIG16AND32
767 cmp cpu, 386
768 jae short use_32_bit ; use faster 32 bit code if possible
769 ENDIF
770
771 IFDEF BIG16
772 mov ds, bignum_seg ; load ds
773
774 shr cx, 1 ; byte = 1/2 word
775
776 ; handle the first step with sar, the rest with rcr
777 sub bx, 2
778
779 sar word ptr ds:[bx], 1 ; shift arithmetic right
780
781 loop top_loop_16
782 jmp short bottom
783
784
785 top_loop_16:
786 ; inc does not change carry flag
787 dec bx ; sub bx, 2
788 dec bx
789
790 rcr word ptr ds:[bx], 1 ; rotate with carry right
791
792 loop top_loop_16
793 ENDIF
794
795 IFDEF BIG16AND32
796 jmp short bottom
797 ENDIF
798
799 IFDEF BIG32
800 use_32_bit:
801 .386
802 mov ds, bignum_seg ; load ds
803
804 shr cx, 2 ; byte = 1/4 dword
805 sub bx, 4 ; decrement by double word size
806 sar dword ptr ds:[bx], 1 ; shift arithmetic right
807
808 loop top_loop_32
809 jmp short bottom
810
811 top_loop_32:
812 dec bx ; sub bx, 4 but keep carry flag
813 dec bx
814 dec bx
815 dec bx
816
817 rcr dword ptr ds:[bx], 1 ; rotate with carry right
818
819 loop top_loop_32
820 ENDIF
821
822 bottom:
823 .8086
824
825 mov ds, ax ; restore ds
826 mov ax, word ptr r ; return r in ax
827 ret
828 half_a_bn ENDP
829
830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
831 ; r = n1 * n2
832 ; Note: r will be a double wide result, 2*bnlength
833 ; n1 and n2 can be the same pointer
834 ; SIDE-EFFECTS: n1 and n2 are changed to their absolute values
835 ;
836 unsafe_full_mult_bn PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t
837 LOCAL sign1:byte, sign2:byte, samevar:byte, \
838 i:word, j:word, steps:word, doublesteps:word, carry_steps:word, \
839 n1p: near ptr byte, n2p: near ptr byte
840
841 push ds ; save ds
842 mov es, bignum_seg ; load es for when ds is a pain
843
844 ; Test to see if n1 and n2 are the same variable. It would be better to
845 ; use square_bn(), but it could happen.
846
847 mov samevar, 0 ; assume they are not the same
848 mov bx, word ptr n1
849 cmp bx, word ptr n2 ; compare offset
850 jne end_samevar_check ; not the same
851 mov samevar, 1 ; they are the same
852 end_samevar_check:
853
854 ; By forcing the bignumber to be positive and keeping track of the sign
855 ; bits separately, quite a few multiplies are saved.
856
857 ; check for sign bits
858 add bx, bnlength
859 mov al, es:[bx-1]
860 and al, 80h ; check the sign bit
861 mov sign1, al
862 jz already_pos1
863 invoke neg_a_bn, n1
864 already_pos1:
865
866 cmp samevar, 1 ; if it's the same variable
je already_pos2 ; then skip this second check
mov bx, word ptr n2
add bx, bnlength
mov al, es:[bx-1]
and al, 80h ; check the sign bit
mov sign2, al
jz already_pos2
invoke neg_a_bn, n2
already_pos2:
; in the following loops, the following pointers are used
; n1p, n2p = points to the part of n1, n2 being used
; di = points to part of doublebignumber r used in outer loop
; si = points to part of doublebignumber r used in inner loop
; bx = points to part of doublebignumber r for carry flag loop
; Also, since r is used more than n1p or n2p, abandon the convention of
; using ES for r. Using DS will save a few clock cycles.
IFDEF BIG16AND32
cmp cpu, 386 ; check cpu
; jae use_32_bit ; use faster 32 bit code if possible
jb wont_use_32bit
jmp use_32_bit ; use faster 32 bit code if possible
wont_use_32bit:
ENDIF
IFDEF BIG16
; set variables
mov dx, bnlength ; set outer loop counter
shr dx, 1 ; byte = 1/2 word
mov steps, dx ; save in steps
mov i, dx
shl dx, 1 ; double steps
; clear r
sub ax, ax ; clear ax
mov cx, dx ; size of doublebignumber (r) in words
mov di, word ptr r ; load r in es:di for stos
rep stosw ; initialize r to 0
sub dx, 2 ; only 2*s-2 steps are really needed
mov doublesteps, dx
mov carry_steps, dx
; prepare segments and offsets for loops
mov di, word ptr r
mov si, di ; both si and di are used here
mov ds, bignum_seg ; load ds
mov ax, word ptr n1 ; load pointers
mov n1p, ax
; use ds for all pointers
top_outer_loop_16:
mov ax, word ptr n2 ; set n2p pointer
mov n2p, ax
mov ax, steps ; set inner loop counter
mov j, ax
top_inner_loop_16:
mov bx, n1p
mov ax, ds:[bx]
mov bx, n2p
mul word ptr ds:[bx]
mov bx, si
add bx, 2 ; increase by size of word
add ds:[bx-2], ax ; add low word
adc ds:[bx], dx ; add high word
jnc no_more_carry_16 ; carry loop not necessary
mov cx, carry_steps ; how many till end of double big number
jcxz no_more_carry_16
add bx, 2 ; move pointer to next word
; loop until no more carry or until end of double big number
top_carry_loop_16:
add word ptr ds:[bx], 1 ; use add, not inc
jnc no_more_carry_16
add bx, 2 ; increase by size of word
loop top_carry_loop_16
no_more_carry_16:
add n2p, 2 ; increase by word size
add si, 2
dec carry_steps ; use one less step
dec j
ja top_inner_loop_16
add n1p, 2 ; increase by word size
add di, 2
mov si, di ; start with si=di
dec doublesteps ; reduce the carry steps needed
mov ax, doublesteps
mov carry_steps, ax
dec i
ja top_outer_loop_16
; result is now r, a double wide bignumber
ENDIF
IFDEF BIG16AND32
jmp bottom
ENDIF
IFDEF BIG32
use_32_bit:
.386
; set variables
mov dx, bnlength ; set outer loop counter
shr dx, 2 ; byte = 1/4 dword
mov steps, dx ; save in steps
mov i, dx
shl dx, 1 ; double steps
; clear r
sub eax, eax ; clear eax
mov cx, dx ; size of doublebignumber in dwords
mov di, word ptr r ; load r in es:di for stos
rep stosd ; initialize r to 0
sub dx, 2 ; only 2*s-2 steps are really needed
mov doublesteps, dx
mov carry_steps, dx
; prepare segments and offsets for loops
mov di, word ptr r
mov si, di ; both si and di are used here
mov ds, bignum_seg ; load ds
mov ax, word ptr n1 ; load pointers
mov n1p, ax
top_outer_loop_32:
mov ax, word ptr n2 ; set n2p pointer
mov n2p, ax
mov ax, steps ; set inner loop counter
mov j, ax
top_inner_loop_32:
mov bx, n1p
mov eax, ds:[bx]
mov bx, n2p
mul dword ptr ds:[bx]
mov bx, si
add bx, 4 ; increase by size of dword
add ds:[bx-4], eax ; add low dword
adc ds:[bx], edx ; add high dword
jnc no_more_carry_32 ; carry loop not necessary
mov cx, carry_steps ; how many till end of double big number
jcxz no_more_carry_32
add bx, 4 ; move pointer to next dword
; loop until no more carry or until end of double big number
top_carry_loop_32:
add dword ptr ds:[bx], 1 ; use add, not inc
jnc no_more_carry_32
add bx, 4 ; increase by size of dword
loop top_carry_loop_32
no_more_carry_32:
add n2p, 4 ; increase by dword size
add si, 4
dec carry_steps ; use one less step
dec j
ja top_inner_loop_32
add n1p, 4 ; increase by dword size
add di, 4
mov si, di ; start with si=di
dec doublesteps ; reduce the carry steps needed
mov ax, doublesteps
mov carry_steps, ax
dec i
ja top_outer_loop_32
; result is now r, a double wide bignumber
ENDIF
bottom:
.8086
pop ds ; restore ds
cmp samevar, 1 ; were the variable the same ones?
je pos_answer ; if yes, then jump
mov al, sign1 ; is result + or - ?
cmp al, sign2 ; sign(n1) == sign(n2) ?
je pos_answer ; yes
shl bnlength, 1 ; temporarily double bnlength
; for double wide bignumber
invoke neg_a_bn, r ; does not affect ES
shr bnlength, 1 ; restore bnlength
pos_answer:
mov ax, word ptr r ; return r in ax
ret
unsafe_full_mult_bn ENDP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; r = n1 * n2 calculating only the top rlength bytes
; Note: r will be of length rlength
; 2*bnlength <= rlength < bnlength
; n1 and n2 can be the same pointer
; SIDE-EFFECTS: n1 and n2 are changed to their absolute values
;
unsafe_mult_bn PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t
LOCAL sign1:byte, sign2:byte, samevar:byte, \
i:word, j:word, steps:word, doublesteps:word, \
carry_steps:word, skips:word, \
n1p: ptr byte, n2p: ptr byte
push ds ; save ds
mov es, bignum_seg ; load es for when ds is a pain
; Test to see if n1 and n2 are the same variable. It would be better to
; use square_bn(), but it could happen.
mov samevar, 0 ; assume they are not the same
mov bx, word ptr n1
cmp bx, word ptr n2 ; compare offset
jne end_samevar_check ; not the same
mov samevar, 1 ; they are the same
end_samevar_check:
; By forcing the bignumber to be positive and keeping track of the sign
; bits separately, quite a few multiplies are saved.
; check for sign bits
add bx, bnlength
mov al, es:[bx-1]
and al, 80h ; check the sign bit
mov sign1, al
jz already_pos1
invoke neg_a_bn, n1
already_pos1:
cmp samevar, 1 ; if it's the same variable
867 je already_pos2 ; then skip this second check
868 mov bx, word ptr n2
869 add bx, bnlength
870 mov al, es:[bx-1]
871 and al, 80h ; check the sign bit
872 mov sign2, al
873 jz already_pos2
874 invoke neg_a_bn, n2
875 already_pos2:
876
877 ; adjust n2 pointer for partial precision
878 mov ax, bnlength
879 shl ax, 1 ; 2*bnlength
880 sub ax, rlength ; 2*bnlength-rlength
881 add word ptr n2, ax ; n2 = n2+2*bnlength-rlength
882
883
884 ; in the following loops, the following pointers are used
885 ; n1p, n2p = points to the part of n1, n2 being used
886 ; di = points to part of doublebignumber used in outer loop
887 ; si = points to part of doublebignumber used in inner loop
888 ; bx = points to part of doublebignumber for carry flag loop
889 ; Also, since r is used more than n1p or n2p, abandon the convention of
890 ; using ES for r. Using DS will save a few clock cycles.
891
892 IFDEF BIG16AND32
893 cmp cpu, 386 ; check cpu
894 ; jae use_32_bit ; use faster 32 bit code if possible
895 jb cant_use_32bit
896 jmp use_32_bit ; use faster 32 bit code if possible
897 cant_use_32bit:
898 ENDIF
899
900 IFDEF BIG16
901 ; clear r
902 sub ax, ax ; clear ax
903 mov cx, rlength ; size of r in bytes
904 shr cx, 1 ; byte = 1/2 word
905 mov di, word ptr r ; load r in es:di for stos
906 rep stosw ; initialize r to 0
907
908 ; set variables
909 mov ax, rlength ; set steps for first loop
910 sub ax, bnlength
911 shr ax, 1 ; byte = 1/2 word
912 mov steps, ax ; save in steps
913
914 mov ax, bnlength
915 shr ax, 1 ; byte = 1/2 word
916 mov i, ax
917
918 sub ax, steps
919 mov skips, ax ; how long to skip over pointer shifts
920
921 mov ax, rlength ; set steps for first loop
922 shr ax, 1 ; byte = 1/2 word
923 sub ax, 2 ; only rlength/2-2 steps are really needed
924 mov doublesteps, ax
925 mov carry_steps, ax
926
927 ; prepare segments and offsets for loops
928 mov di, word ptr r
929 mov si, di ; both si and di are used here
930 mov ds, bignum_seg ; load ds
931 mov ax, word ptr n1 ; load pointers
932 mov n1p, ax
933 ; use ds for all pointers
934
935
936 top_outer_loop_16:
937 mov ax, word ptr n2 ; set n2p pointer
938 mov n2p, ax
939 mov ax, steps ; set inner loop counter
940 mov j, ax
941
942 top_inner_loop_16:
943 mov bx, n1p
944 mov ax, ds:[bx]
945 mov bx, n2p
946 mul word ptr ds:[bx]
947
948 mov bx, si
949 add bx, 2 ; increase by size of word
950 add ds:[bx-2], ax ; add low word
951 adc ds:[bx], dx ; add high word
952 jnc no_more_carry_16 ; carry loop not necessary
953
954 mov cx, carry_steps ; how many till end of double big number
955 jcxz no_more_carry_16
956 add bx, 2 ; move pointer to next word
957
958 ; loop until no more carry or until end of double big number
959 top_carry_loop_16:
960 add word ptr ds:[bx], 1 ; use add, not inc
961 jnc no_more_carry_16
962 add bx, 2 ; increase by size of word
963 loop top_carry_loop_16
964
965 no_more_carry_16:
966 add n2p, 2 ; increase by word size
967 add si, 2
968 dec carry_steps ; use one less step
969 dec j
970 ja top_inner_loop_16
971
972 add n1p, 2 ; increase by word size
973
974 cmp skips, 0
975 je type2_shifts_16
976 sub word ptr n2, 2 ; shift n2 back a word
977 inc steps ; one more step this time
978 ; leave di and doublesteps where they are
979 dec skips ; keep track of how many times we've done this
jmp shifts_bottom_16
type2_shifts_16:
add di, 2 ; shift di forward a word
dec doublesteps ; reduce the carry steps needed
shifts_bottom_16:
mov si, di ; start with si=di
mov ax, doublesteps
mov carry_steps, ax
dec i
ja top_outer_loop_16
; result is in r
ENDIF
IFDEF BIG16AND32
jmp bottom
ENDIF
IFDEF BIG32
use_32_bit:
.386
; clear r
sub eax, eax ; clear eax
mov cx, rlength ; size of r in bytes
shr cx, 2 ; byte = 1/4 dword
mov di, word ptr r ; load r in es:di for stos
rep stosd ; initialize r to 0
; set variables
mov ax, rlength ; set steps for first loop
sub ax, bnlength
shr ax, 2 ; byte = 1/4 dword
mov steps, ax ; save in steps
mov ax, bnlength
shr ax, 2 ; byte = 1/4 dword
mov i, ax
sub ax, steps
mov skips, ax ; how long to skip over pointer shifts
mov ax, rlength ; set steps for first loop
shr ax, 2 ; byte = 1/4 dword
sub ax, 2 ; only rlength/4-2 steps are really needed
mov doublesteps, ax
mov carry_steps, ax
; prepare segments and offsets for loops
mov di, word ptr r
mov si, di ; both si and di are used here
mov ds, bignum_seg ; load ds
mov ax, word ptr n1 ; load pointers
mov n1p, ax
top_outer_loop_32:
mov ax, word ptr n2 ; set n2p pointer
mov n2p, ax
mov ax, steps ; set inner loop counter
mov j, ax
top_inner_loop_32:
mov bx, n1p
mov eax, ds:[bx]
mov bx, n2p
mul dword ptr ds:[bx]
mov bx, si
add bx, 4 ; increase by size of dword
add ds:[bx-4], eax ; add low dword
adc ds:[bx], edx ; add high dword
jnc no_more_carry_32 ; carry loop not necessary
mov cx, carry_steps ; how many till end of double big number
jcxz no_more_carry_32
add bx, 4 ; move pointer to next dword
; loop until no more carry or until end of r
top_carry_loop_32:
add dword ptr ds:[bx], 1 ; use add, not inc
jnc no_more_carry_32
add bx, 4 ; increase by size of dword
loop top_carry_loop_32
no_more_carry_32:
add n2p, 4 ; increase by dword size
add si, 4
dec carry_steps ; use one less step
dec j
ja top_inner_loop_32
add n1p, 4 ; increase by dword size
cmp skips, 0
je type2_shifts_32
sub word ptr n2, 4 ; shift n2 back a dword
inc steps ; one more step this time
; leave di and doublesteps where they are
dec skips ; keep track of how many times we've done this
980 jmp shifts_bottom_32
981 type2_shifts_32:
982 add di, 4 ; shift di forward a dword
983 dec doublesteps ; reduce the carry steps needed
984 shifts_bottom_32:
985 mov si, di ; start with si=di
986 mov ax, doublesteps
987 mov carry_steps, ax
988
989 dec i
990 ja top_outer_loop_32
991
992 ; result is in r
993 ENDIF
994
995 bottom:
996 .8086
997 pop ds ; restore ds
998 cmp samevar, 1 ; were the variable the same ones?
999 je pos_answer ; if yes, then jump
1000
1001 mov al, sign1 ; is result + or - ?
1002 cmp al, sign2 ; sign(n1) == sign(n2) ?
1003 je pos_answer ; yes
1004 push bnlength ; save bnlength
1005 mov ax, rlength
1006 mov bnlength, ax ; set bnlength = rlength
1007 invoke neg_a_bn, r ; does not affect ES
1008 pop bnlength ; restore bnlength
1009 pos_answer:
1010
1011 mov ax, word ptr r ; return r in ax
1012 ret
1013 unsafe_mult_bn ENDP
1014
1015 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1016 ; r = n^2
1017 ; because of the symetry involved, n^2 is much faster than n*n
1018 ; for a bignumber of length l
1019 ; n*n takes l^2 multiplications
1020 ; n^2 takes (l^2+l)/2 multiplications
1021 ; which is about 1/2 n*n as l gets large
1022 ; uses the fact that (a+b+c+...)^2 = (a^2+b^2+c^2+...)+2(ab+ac+bc+...)
1023 ;
1024 ; Note: r will be a double wide result, 2*bnlength
1025 ; SIDE-EFFECTS: n is changed to its absolute value
1026 ;
1027 unsafe_full_square_bn PROC USES di si, r:bn_t, n:bn_t
1028 LOCAL i:word, j:word, steps:word, doublesteps:word, carry_steps:word, \
1029 save_ds:word, \
1030 rp1: ptr byte, rp2: ptr byte
1031
1032 mov save_ds, ds ; save ds
1033 mov es, bignum_seg ; load es for when ds is a pain
1034
1035 ; By forcing the bignumber to be positive and keeping track of the sign
1036 ; bits separately, quite a few multiplies are saved.
1037
1038 ; check for sign bit
1039 mov bx, word ptr n
1040 add bx, bnlength
1041 mov al, es:[bx-1]
1042 and al, 80h ; check the sign bit
1043 jz already_pos
1044 invoke neg_a_bn, n
1045 already_pos:
1046
1047 ; in the following loops, the following pointers are used
1048 ; n1p(di), n2p(si) = points to the parts of n being used (es)
1049 ; rp1 = points to part of doublebignumber used in outer loop (ds)
1050 ; rp2 = points to part of doublebignumber used in inner loop (ds)
1051 ; bx = points to part of doublebignumber for carry flag loop (ds)
1052
1053 mov cx, bnlength ; size of doublebignumber in words
1054
1055 IFDEF BIG16AND32
1056 cmp cpu, 386 ; check cpu
1057 ; jae use_32_bit ; use faster 32 bit code if possible
1058 jb dont_use_32bit
1059 jmp use_32_bit ; use faster 32 bit code if possible
1060 dont_use_32bit:
1061 ENDIF
1062
1063 IFDEF BIG16
1064 ; clear r
1065 sub ax, ax ; clear ax
1066 ; 2{twice the size}*bnlength/2{bytes per word}
1067 mov di, word ptr r ; load r pointer in es:di for stos
1068 rep stosw ; initialize r to 0
1069
1070 ; initialize vars
1071 mov dx, bnlength ; set outer loop counter
1072 shr dx, 1 ; byte = 1/2 word
1073 dec dx ; don't need to do last one
mov i, dx ; loop counter
mov steps, dx ; save in steps
shl dx, 1 ; double steps
sub dx, 1 ; only 2*s-1 steps are really needed
mov doublesteps, dx
mov carry_steps, dx
; initialize pointers
mov di, word ptr n
mov ax, word ptr r
mov ds, bignum_seg ; load ds
add ax, 2 ; start with second word
mov rp1, ax
mov rp2, ax ; start with rp2=rp1
cmp i, 0 ; if bignumberlength is 2
je skip_middle_terms_16
top_outer_loop_16:
mov si, di ; set n2p pointer
add si, 2 ; to 1 word beyond n1p(di)
mov ax, steps ; set inner loop counter
mov j, ax
top_inner_loop_16:
mov ax, ds:[di]
mul word ptr ds:[si]
mov bx, rp2
add bx, 2 ; increase by size of word
add ds:[bx-2], ax ; add low word
adc ds:[bx], dx ; add high word
jnc no_more_carry_16 ; carry loop not necessary
mov cx, carry_steps ; how many till end of double big number
jcxz no_more_carry_16
add bx, 2 ; move pointer to next word
; loop until no more carry or until end of double big number
top_carry_loop_16:
add word ptr ds:[bx], 1 ; use add, not inc
jnc no_more_carry_16
add bx, 2 ; increase by size of word
loop top_carry_loop_16
no_more_carry_16:
add si, 2 ; increase by word size
add rp2, 2
dec carry_steps ; use one less step
dec j
ja top_inner_loop_16
add di, 2 ; increase by word size
add rp1, 4 ; increase by 2*word size
mov ax, rp1
mov rp2, ax ; start with rp2=rp1
sub doublesteps,2 ; reduce the carry steps needed
mov ax, doublesteps
mov carry_steps, ax
dec steps ; use one less step
dec i
ja top_outer_loop_16
; All the middle terms have been multiplied. Now double it.
mov ds, save_ds ; restore ds to get bnlength
shl bnlength, 1 ; r is a double wide bignumber
invoke double_a_bn, r ; doesn't change es
1074 shr bnlength, 1 ; restore r
1075
1076 skip_middle_terms_16: ; ds is not necessarily restored here
1077
1078 ; Now go back and add in the squared terms.
1079 ; In the following loops, the following pointers are used
1080 ; n1p(di) = points to the parts of n being used (es)
1081 ; rp1(si) = points to part of doublebignumber used in outer loop (ds)
1082 ; bx = points to part of doublebignumber for carry flag loop (ds)
1083
1084 mov di, word ptr n ; load n1p pointer in di
1085
1086 mov ds, save_ds ; restore ds to get bnlength
1087 mov dx, bnlength ; set outer loop counter
1088 shr dx, 1 ; 1 bytes = 1/2 word
1089 mov i, dx ; loop counter
1090 shl dx, 1 ; double steps
1091
1092 sub dx, 2 ; only 2*s-2 steps are really needed
1093 mov doublesteps, dx
1094 mov carry_steps, dx
1095 mov si, word ptr r ; set rp1
1096 mov ds, bignum_seg ; load ds
1097
1098
1099 top_outer_loop_squares_16:
1100
1101 mov ax, ds:[di]
1102 mul ax ; square it
1103
1104 mov bx, si
1105 add bx, 2 ; increase by size of word
1106 add ds:[bx-2], ax ; add low word
1107 adc ds:[bx], dx ; add high word
1108 jnc no_more_carry_squares_16 ; carry loop not necessary
1109
1110 mov cx, carry_steps ; how many till end of double big number
1111 jcxz no_more_carry_squares_16
1112 add bx, 2 ; move pointer to next word
1113
1114 ; loop until no more carry or until end of double big number
1115 top_carry_loop_squares_16:
1116 add word ptr ds:[bx], 1 ; use add, not inc
1117 jnc no_more_carry_squares_16
1118 add bx, 2 ; increase by size of word
1119 loop top_carry_loop_squares_16
1120
1121 no_more_carry_squares_16:
1122 add di, 2 ; increase by word size
1123 add si, 4 ; increase by 2*word size
1124
1125 sub doublesteps,2 ; reduce the carry steps needed
1126 mov ax, doublesteps
1127 mov carry_steps, ax
1128
1129 dec i
1130 ja top_outer_loop_squares_16
1131
1132
1133 ; result is in r, a double wide bignumber
1134 ENDIF
1135
1136 IFDEF BIG16AND32
1137 jmp bottom
1138 ENDIF
1139
1140 IFDEF BIG32
1141 use_32_bit:
1142 .386
1143 ; clear r
1144 sub eax, eax ; clear eax
1145 ; 2{twice the size}*bnlength/4{bytes per word}
1146 shr cx, 1 ; size of doublebignumber in dwords
1147 mov di, word ptr r ; load r pointer in es:di for stos
1148 rep stosd ; initialize r to 0
1149
1150 ; initialize vars
1151 mov dx, bnlength ; set outer loop counter
1152 shr dx, 2 ; byte = 1/4 dword
1153 dec dx ; don't need to do last one
mov i, dx ; loop counter
mov steps, dx ; save in steps
shl dx, 1 ; double steps
sub dx, 1 ; only 2*s-1 steps are really needed
mov doublesteps, dx
mov carry_steps, dx
; initialize pointers
mov di, word ptr n ; load n1p pointer
mov ax, word ptr r
mov ds, bignum_seg ; load ds
add ax, 4 ; start with second dword
mov rp1, ax
mov rp2, ax ; start with rp2=rp1
cmp i, 0 ; if bignumberlength is 4
je skip_middle_terms_32
top_outer_loop_32:
mov si, di ; set n2p pointer
add si, 4 ; to 1 dword beyond n1p(di)
mov ax, steps ; set inner loop counter
mov j, ax
top_inner_loop_32:
mov eax, ds:[di]
mul dword ptr ds:[si]
mov bx, rp2
add bx, 4 ; increase by size of dword
add ds:[bx-4], eax ; add low dword
adc ds:[bx], edx ; add high dword
jnc no_more_carry_32 ; carry loop not necessary
mov cx, carry_steps ; how many till end of double big number
jcxz no_more_carry_32
add bx, 4 ; move pointer to next dword
; loop until no more carry or until end of double big number
top_carry_loop_32:
add dword ptr ds:[bx], 1 ; use add, not inc
jnc no_more_carry_32
add bx, 4 ; increase by size of dword
loop top_carry_loop_32
no_more_carry_32:
add si, 4 ; increase by dword size
add rp2, 4
dec carry_steps ; use one less step
dec j
ja top_inner_loop_32
add di, 4 ; increase by dword size
add rp1, 8 ; increase by 2*dword size
mov ax, rp1
mov rp2, ax ; start with rp2=rp1
sub doublesteps,2 ; reduce the carry steps needed
mov ax, doublesteps
mov carry_steps, ax
dec steps ; use one less step
dec i
ja top_outer_loop_32
; All the middle terms have been multiplied. Now double it.
mov ds, save_ds ; restore ds to get bnlength
shl bnlength, 1 ; r is a double wide bignumber
invoke double_a_bn, r
shr bnlength, 1 ; restore r
skip_middle_terms_32: ; ds is not necessarily restored here
; Now go back and add in the squared terms.
; In the following loops, the following pointers are used
; n1p(di) = points to the parts of n being used (es)
; rp1(si) = points to part of doublebignumber used in outer loop (ds)
; bx = points to part of doublebignumber for carry flag loop (ds)
mov di, word ptr n ; load n1p pointer in ds:di
mov ds, save_ds ; restore ds to get bnlength
mov dx, bnlength ; set outer loop counter
shr dx, 2 ; 1 bytes = 1/4 dword
mov i, dx ; loop counter
shl dx, 1 ; double steps
sub dx, 2 ; only 2*s-2 steps are really needed
mov doublesteps, dx
mov carry_steps, dx
mov si, word ptr r ; set rp1
mov ds, bignum_seg ; load ds
top_outer_loop_squares_32:
mov eax, ds:[di]
mul eax ; square it
mov bx, si
add bx, 4 ; increase by size of dword
add ds:[bx-4], eax ; add low dword
adc ds:[bx], edx ; add high dword
jnc no_more_carry_squares_32 ; carry loop not necessary
mov cx, carry_steps ; how many till end of double big number
jcxz no_more_carry_squares_32
add bx, 4 ; move pointer to next dword
; loop until no more carry or until end of double big number
top_carry_loop_squares_32:
add dword ptr ds:[bx], 1 ; use add, not inc
jnc no_more_carry_squares_32
add bx, 4 ; increase by size of dword
loop top_carry_loop_squares_32
no_more_carry_squares_32:
add di, 4 ; increase by dword size
add si, 8 ; increase by 2*dword size
sub doublesteps,2 ; reduce the carry steps needed
mov ax, doublesteps
mov carry_steps, ax
dec i
ja top_outer_loop_squares_32
; result is in r, a double wide bignumber
ENDIF
bottom:
.8086
; since it is a square, the result has to already be positive
mov ds, save_ds ; restore ds
mov ax, word ptr r ; return r in ax
ret
unsafe_full_square_bn ENDP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; r = n^2
; because of the symetry involved, n^2 is much faster than n*n
; for a bignumber of length l
; n*n takes l^2 multiplications
; n^2 takes (l^2+l)/2 multiplications
; which is about 1/2 n*n as l gets large
; uses the fact that (a+b+c+...)^2 = (a^2+b^2+c^2+...)+2(ab+ac+bc+...)
;
; Note: r will be of length rlength
; 2*bnlength >= rlength > bnlength
; SIDE-EFFECTS: n is changed to its absolute value
;
unsafe_square_bn PROC USES di si, r:bn_t, n:bn_t
LOCAL i:word, j:word, steps:word, doublesteps:word, carry_steps:word, \
skips:word, rodd:word, \
save_ds:word, \
n3p: ptr byte, \
rp1: ptr byte, rp2: ptr byte
; This whole procedure would be a great deal simpler if we could assume that
; rlength < 2*bnlength (that is, not =). Therefore, we will take the
; easy way out and call full_square_bn() if it is.
mov ax, rlength
shr ax, 1 ; 1/2 * rlength
cmp ax, bnlength ; 1/2 * rlength == bnlength?
jne not_full_square
invoke unsafe_full_square_bn, r, n
; dx:ax is still loaded with return value
jmp quit_proc ; we're outa here
1154 not_full_square:
1155
1156 mov save_ds, ds
1157 mov es, bignum_seg ; load es for when ds is a pain
1158
1159 ; By forcing the bignumber to be positive and keeping track of the sign
1160 ; bits separately, quite a few multiplies are saved.
1161
1162 ; check for sign bit
1163 mov bx, word ptr n ; load n1 pointer in es:bx
1164 add bx, bnlength
1165 mov al, es:[bx-1]
1166 and al, 80h ; check the sign bit
1167 jz already_pos
1168 invoke neg_a_bn, n
1169 already_pos:
1170
1171 ; in the following loops, the following pointers are used
1172 ; n1p(di), n2p(si) = points to the parts of n being used (es)
1173 ; rp1 = points to part of doublebignumber used in outer loop (ds)
1174 ; rp2 = points to part of doublebignumber used in inner loop (ds)
1175 ; bx = points to part of doublebignumber for carry flag loop (ds)
1176
1177 IFDEF BIG16AND32
1178 cmp cpu, 386 ; check cpu
1179 ; jae use_32_bit ; use faster 32 bit code if possible
1180 jb skip_use_32bit
1181 jmp use_32_bit ; use faster 32 bit code if possible
1182 skip_use_32bit:
1183 ENDIF
1184
1185 IFDEF BIG16
1186 ; clear r
1187 sub ax, ax ; clear ax
1188 mov cx, rlength ; size of rlength in bytes
1189 shr cx, 1 ; byte = 1/2 word
1190 mov di, word ptr r ; load r pointer in es:di for stos
1191 rep stosw ; initialize r to 0
1192
1193
1194 ; initialize vars
1195
1196 ; determine whether r is on an odd or even word in the number
1197 ; (even if rlength==2*bnlength, dec r alternates odd/even)
1198 mov ax, bnlength
1199 shl ax, 1 ; double wide width
1200 sub ax, rlength ; 2*bnlength-rlength
1201 shr ax, 1 ; 1 byte = 1/2 word
1202 and ax, 0001h ; check the odd sign bit
1203 mov rodd, ax
1204
1205 mov ax, bnlength ; set outer loop counter
1206 shr ax, 1 ; byte = 1/2 word
1207 dec ax ; don't need to do last one
mov i, ax ; loop counter
mov ax, rlength ; set steps for first loop
sub ax, bnlength
shr ax, 1 ; byte = 1/2 word
mov steps, ax ; save in steps
mov dx, bnlength
shr dx, 1 ; bnlength/2
add ax, dx ; steps+bnlength/2
sub ax, 2 ; steps+bnlength/2-2
mov doublesteps, ax
mov carry_steps, ax
mov ax, i
sub ax, steps
shr ax, 1 ; for both words and dwords
mov skips, ax ; how long to skip over pointer shifts
; initialize pointers
mov di, word ptr n
mov si, di
mov ax, bnlength
shr ax, 1 ; 1 byte = 1/2 word
sub ax, steps
shl ax, 1 ; 1 byte = 1/2 word
add si, ax ; n2p = n1p + 2*(bnlength/2 - steps)
mov n3p, si ; save for later use
mov ax, word ptr r
mov ds, bignum_seg ; load ds
mov rp1, ax
mov rp2, ax ; start with rp2=rp1
cmp i, 0 ; if bignumberlength is 2
; je skip_middle_terms_16
jne top_outer_loop_16
jmp skip_middle_terms_16
top_outer_loop_16:
mov ax, steps ; set inner loop counter
mov j, ax
top_inner_loop_16:
mov ax, ds:[di]
mul word ptr ds:[si]
mov bx, rp2
add bx, 2 ; increase by size of word
add ds:[bx-2], ax ; add low word
adc ds:[bx], dx ; add high word
jnc no_more_carry_16 ; carry loop not necessary
mov cx, carry_steps ; how many till end of double big number
jcxz no_more_carry_16
add bx, 2 ; move pointer to next word
; loop until no more carry or until end of double big number
top_carry_loop_16:
add word ptr ds:[bx], 1 ; use add, not inc
jnc no_more_carry_16
add bx, 2 ; increase by size of word
loop top_carry_loop_16
no_more_carry_16:
add si, 2 ; increase by word size
add rp2, 2
dec carry_steps ; use one less step
dec j
ja top_inner_loop_16
add di, 2 ; increase by word size
mov ax, rodd ; whether r is on an odd or even word
cmp skips, 0
jle type2_shifts_16
sub n3p, 2 ; point to previous word
mov si, n3p
inc steps ; one more step this time
; leave rp1 and doublesteps where they are
dec skips
jmp shifts_bottom_16
type2_shifts_16: ; only gets executed once
jl type3_shifts_16
sub steps, ax ; steps -= (0 or 1)
inc ax ; ax = 1 or 2 now
sub doublesteps, ax ; decrease double steps by 1 or 2
shl ax, 1 ; 1 byte = 1/2 word
add rp1, ax ; add 1 or 2 words
mov si, di
add si, 2 ; si = di + word
dec skips ; make skips negative
jmp shifts_bottom_16
type3_shifts_16:
dec steps
sub doublesteps, 2
add rp1, 4 ; + two words
mov si, di
add si, 2 ; si = di + word
shifts_bottom_16:
mov ax, rp1
mov rp2, ax ; start with rp2=rp1
mov ax, doublesteps
mov carry_steps, ax
dec i
; ja top_outer_loop_16
jna not_top_outer_loop_16
jmp top_outer_loop_16
not_top_outer_loop_16:
; All the middle terms have been multiplied. Now double it.
mov ds, save_ds ; restore ds to get bnlength
push bnlength ; save bnlength
mov ax, rlength
mov bnlength, ax ; r is of length rlength
invoke double_a_bn, r
pop bnlength
skip_middle_terms_16:
; Now go back and add in the squared terms.
; In the following loops, the following pointers are used
; n1p(di) = points to the parts of n being used (es)
; rp1(si) = points to part of doublebignumber used in outer loop (ds)
; bx = points to part of doublebignumber for carry flag loop (ds)
; be careful, the next dozen or so lines are confusing!
; determine whether r is on an odd or even word in the number
mov ax, bnlength
shl ax, 1 ; double wide width
sub ax, rlength ; 2*bnlength-rlength
mov dx, ax ; save this for a moment
and ax, 0002h ; check the odd sign bit
mov si, word ptr r ; load r pointer in ds:si
add si, ax ; depending on odd or even byte
shr dx, 1 ; assumes word size
inc dx
and dx, 0FFFEh ; ~2+1, turn off last bit, mult of 2
mov di, word ptr n ; load n1p pointer in di
; es is still set from before
add di, dx
mov ax, bnlength
sub ax, dx
shr ax, 1 ; 1 byte = 1/2 word
mov i, ax
shl ax, 1 ; double steps
sub ax, 2 ; only 2*s-2 steps are really needed
mov doublesteps, ax
mov carry_steps, ax
mov ds, bignum_seg ; load ds
top_outer_loop_squares_16:
mov ax, ds:[di]
mul ax ; square it
mov bx, si
add bx, 2 ; increase by size of word
add ds:[bx-2], ax ; add low word
adc ds:[bx], dx ; add high word
jnc no_more_carry_squares_16 ; carry loop not necessary
mov cx, carry_steps ; how many till end of double big number
jcxz no_more_carry_squares_16
add bx, 2 ; move pointer to next word
; loop until no more carry or until end of double big number
top_carry_loop_squares_16:
add word ptr ds:[bx], 1 ; use add, not inc
jnc no_more_carry_squares_16
add bx, 2 ; increase by size of word
loop top_carry_loop_squares_16
no_more_carry_squares_16:
add di, 2 ; increase by word size
add si, 4 ; increase by 2*word size
sub doublesteps,2 ; reduce the carry steps needed
mov ax, doublesteps
mov carry_steps, ax
dec i
ja top_outer_loop_squares_16
; result is in r
ENDIF
IFDEF BIG16AND32
jmp bottom
ENDIF
IFDEF BIG32
use_32_bit:
.386
; clear r
sub eax, eax ; clear eax
mov cx, rlength ; size of rlength in bytes
shr cx, 2 ; byte = 1/4 dword
mov di, word ptr r ; load r pointer in es:di for stos
rep stosd ; initialize r to 0
; initialize vars
; determine whether r is on an odd or even dword in the number
; (even if rlength==2*bnlength, dec r alternates odd/even)
mov ax, bnlength
shl ax, 1 ; double wide width
sub ax, rlength ; 2*bnlength-rlength
shr ax, 2 ; 1 byte = 1/4 dword
and ax, 0001h ; check the odd sign bit
mov rodd, ax
mov ax, bnlength ; set outer loop counter
shr ax, 2 ; byte = 1/4 dword
dec ax ; don't need to do last one
1208 mov i, ax ; loop counter
1209
1210 mov ax, rlength ; set steps for first loop
1211 sub ax, bnlength
1212 shr ax, 2 ; byte = 1/4 dword
1213 mov steps, ax ; save in steps
1214
1215 mov dx, bnlength
1216 shr dx, 2 ; bnlength/4
1217 add ax, dx ; steps+bnlength/4
1218 sub ax, 2 ; steps+bnlength/4-2
1219 mov doublesteps, ax
1220 mov carry_steps, ax
1221
1222 mov ax, i
1223 sub ax, steps
1224 shr ax, 1 ; for both words and dwords
1225 mov skips, ax ; how long to skip over pointer shifts
1226
1227 ; initialize pointers
1228 mov di, word ptr n ; load n1p pointer
1229 mov si, di
1230 mov ax, bnlength
1231 shr ax, 2 ; 1 byte = 1/4 dword
1232 sub ax, steps
1233 shl ax, 2 ; 1 byte = 1/4 dword
1234 add si, ax ; n2p = n1p + bnlength/4 - steps
1235 mov n3p, si ; save for later use
1236 mov ax, word ptr r
1237 mov ds, bignum_seg ; load ds
1238 mov rp1, ax
1239 mov rp2, ax ; start with rp2=rp1
1240
1241 cmp i, 0 ; if bignumberlength is 2
1242 je skip_middle_terms_32
1243
1244 top_outer_loop_32:
1245 mov ax, steps ; set inner loop counter
1246 mov j, ax
1247
1248 top_inner_loop_32:
1249 mov eax, ds:[di]
1250 mul dword ptr ds:[si]
1251
1252 mov bx, rp2
1253 add bx, 4 ; increase by size of dword
1254 add ds:[bx-4], eax ; add low dword
1255 adc ds:[bx], edx ; add high dword
1256 jnc no_more_carry_32 ; carry loop not necessary
1257
1258 mov cx, carry_steps ; how many till end of double big number
1259 jcxz no_more_carry_32
1260 add bx, 4 ; move pointer to next dword
1261
1262 ; loop until no more carry or until end of double big number
1263 top_carry_loop_32:
1264 add dword ptr ds:[bx], 1 ; use add, not inc
1265 jnc no_more_carry_32
1266 add bx, 4 ; increase by size of dword
1267 loop top_carry_loop_32
1268
1269 no_more_carry_32:
1270 add si, 4 ; increase by dword size
1271 add rp2, 4
1272 dec carry_steps ; use one less step
1273 dec j
1274 ja top_inner_loop_32
1275
1276 add di, 4 ; increase by dword size
1277
1278 mov ax, rodd ; whether r is on an odd or even dword
1279
1280 cmp skips, 0
1281 jle type2_shifts_32
1282 sub n3p, 4 ; point to previous dword
1283 mov si, n3p
1284 inc steps ; one more step this time
1285 ; leave rp1 and doublesteps where they are
1286 dec skips
1287 jmp shifts_bottom_32
1288 type2_shifts_32: ; only gets executed once
1289 jl type3_shifts_32
1290 sub steps, ax ; steps -= (0 or 1)
1291 inc ax ; ax = 1 or 2 now
1292 sub doublesteps, ax ; decrease double steps by 1 or 2
1293 shl ax, 2 ; 1 byte = 1/4 dword
1294 add rp1, ax ; add 1 or 2 dwords
1295 mov si, di
1296 add si, 4 ; si = di + dword
1297 dec skips ; make skips negative
1298 jmp shifts_bottom_32
1299 type3_shifts_32:
1300 dec steps
1301 sub doublesteps, 2
1302 add rp1, 8 ; + two dwords
1303 mov si, di
1304 add si, 4 ; si = di + dword
1305 shifts_bottom_32:
1306
1307 mov ax, rp1
1308 mov rp2, ax ; start with rp2=rp1
1309
1310 mov ax, doublesteps
1311 mov carry_steps, ax
1312
1313 dec i
1314 ja top_outer_loop_32
1315
1316 ; All the middle terms have been multiplied. Now double it.
1317 mov ds, save_ds ; restore ds to get bnlength
1318 push bnlength ; save bnlength
1319 mov ax, rlength
1320 mov bnlength, ax ; r is of length rlength
1321 invoke double_a_bn, r
1322 pop bnlength
1323
1324 skip_middle_terms_32:
1325 ; Now go back and add in the squared terms.
1326 ; In the following loops, the following pointers are used
1327 ; n1p(di) = points to the parts of n being used (es)
1328 ; rp1(si) = points to part of doublebignumber used in outer loop (ds)
1329 ; bx = points to part of doublebignumber for carry flag loop (ds)
1330
1331 ; be careful, the next dozen or so lines are confusing!
1332
1333 ; determine whether r is on an odd or even word in the number
1334 mov ax, bnlength
1335 shl ax, 1 ; double wide width
1336 sub ax, rlength ; 2*bnlength-rlength
1337 mov dx, ax ; save this for a moment
1338 and ax, 0004h ; check the odd sign bit
1339
1340 mov si, word ptr r ; load r pointer in ds:si
1341 add si, ax ; depending on odd or even byte
1342
1343 shr dx, 2 ; assumes dword size
1344 inc dx
1345 and dx, 0FFFEh ; ~2+1, turn off last bit, mult of 2
1346 shl dx, 1
1347 mov di, word ptr n ; load n1p pointer in di
1348 ; es is still set from before
1349 add di, dx
1350
1351 mov ax, bnlength
1352 sub ax, dx
1353 shr ax, 2 ; 1 byte = 1/4 dword
1354 mov i, ax
1355
1356 shl ax, 1 ; double steps
1357 sub ax, 2 ; only 2*s-2 steps are really needed
1358 mov doublesteps, ax
1359 mov carry_steps, ax
1360
1361 mov ds, bignum_seg ; load ds
1362
1363 top_outer_loop_squares_32:
1364
1365 mov eax, ds:[di]
1366 mul eax ; square it
1367
1368 mov bx, si
1369 add bx, 4 ; increase by size of dword
1370 add ds:[bx-4], eax ; add low dword
1371 adc ds:[bx], edx ; add high dword
1372 jnc no_more_carry_squares_32 ; carry loop not necessary
1373
1374 mov cx, carry_steps ; how many till end of double big number
1375 jcxz no_more_carry_squares_32
1376 add bx, 4 ; move pointer to next dword
1377
1378 ; loop until no more carry or until end of double big number
1379 top_carry_loop_squares_32:
1380 add dword ptr ds:[bx], 1 ; use add, not inc
1381 jnc no_more_carry_squares_32
1382 add bx, 4 ; increase by size of dword
1383 loop top_carry_loop_squares_32
1384
1385 no_more_carry_squares_32:
1386 add di, 4 ; increase by dword size
1387 add si, 8 ; increase by 2*dword size
1388
1389 sub doublesteps,2 ; reduce the carry steps needed
1390 mov ax, doublesteps
1391 mov carry_steps, ax
1392
1393 dec i
1394 ja top_outer_loop_squares_32
1395
1396
1397 ; result is in r
1398 ENDIF
1399
1400 bottom:
1401 .8086
1402
1403 ; since it is a square, the result has to already be positive
1404
1405 mov ds, save_ds ; restore ds
1406 mov ax, word ptr r ; return r in ax
1407
1408 quit_proc:
1409 ret
1410 unsafe_square_bn ENDP
1411
1412 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1413 ; r = n * u where u is an unsigned integer
1414 mult_bn_int PROC USES di si, r:bn_t, n:bn_t, u:word
1415 LOCAL lu:dword ; long unsigned integer in 32 bit math
1416
1417 push ds ; save ds
1418 mov cx, bnlength
1419 mov di, WORD PTR r
1420 mov si, WORD PTR n
1421
1422
1423 IFDEF BIG16AND32
1424 cmp cpu, 386 ; check cpu
1425 jae use_32_bit ; use faster 32 bit code if possible
1426 ENDIF
1427
1428 IFDEF BIG16
1429 mov ds, bignum_seg ; load ds
1430
1431 ; no need to clear r
1432
1433 shr cx, 1 ; byte = 1/2 word
1434 sub bx, bx ; use bx for temp holding carried word
1435
1436 top_loop_16:
1437 mov ax, ds:[si] ; load next word from n
1438 mul u ; n * u
1439 add ax, bx ; add last carried upper word
1440 adc dx, 0 ; inc the carried word if carry flag set
1441 mov bx, dx ; save high word in bx
1442 mov ds:[di], ax ; save low word
1443
1444 add di, 2 ; next word in r
1445 add si, 2 ; next word in n
1446 loop top_loop_16
1447 ENDIF
1448
1449 IFDEF BIG16AND32
1450 jmp bottom
1451 ENDIF
1452
1453 IFDEF BIG32
1454 use_32_bit:
1455 .386
1456 mov ds, bignum_seg ; load ds
1457
1458 ; no need to clear r
1459
1460 shr cx, 2 ; byte = 1/4 dword
1461 sub ebx, ebx ; use ebx for temp holding carried dword
1462
1463 sub eax, eax ; clear upper eax
1464 mov ax, u ; convert u (unsigned int)
1465 mov lu, eax ; to lu (long unsigned int)
1466
1467 top_loop_32:
1468 mov eax, ds:[si] ; load next dword from n
1469 mul lu ; n * lu
1470 add eax, ebx ; add last carried upper dword
1471 adc edx, 0 ; inc the carried dword if carry flag set
1472 mov ebx, edx ; save high dword in ebx
1473 mov ds:[di], eax ; save low dword
1474
1475 add di, 4 ; next dword in r
1476 add si, 4 ; next dword in n
1477 loop top_loop_32
1478 ENDIF
1479
1480 bottom:
1481 .8086
1482
1483 pop ds
1484 mov ax, word ptr r ; return r in ax
1485 ret
1486 mult_bn_int ENDP
1487
1488 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1489 ; r *= u where u is an unsigned integer
1490 mult_a_bn_int PROC USES di si, r:bn_t, u:word
1491
1492 push ds ; save ds
1493 mov cx, bnlength ; set outer loop counter
1494 mov si, WORD PTR r
1495
1496
1497 IFDEF BIG16AND32
1498 cmp cpu, 386 ; check cpu
1499 jae use_32_bit ; use faster 32 bit code if possible
1500 ENDIF
1501
1502 IFDEF BIG16
1503 mov ds, bignum_seg ; load ds
1504 ; no need to clear r
1505 shr cx, 1 ; byte = 1/2 word
1506 sub bx, bx ; use bx for temp holding carried word
1507 mov di, u ; save u in di
1508
1509 top_loop_16:
1510 mov ax, ds:[si] ; load next word from r
1511 mul di ; r * u
1512 add ax, bx ; add last carried upper word
1513 adc dx, 0 ; inc the carried word if carry flag set
1514 mov bx, dx ; save high word in bx
1515 mov ds:[si], ax ; save low word
1516
1517 add si, 2 ; next word in r
1518 loop top_loop_16
1519 ENDIF
1520
1521 IFDEF BIG16AND32
1522 jmp bottom
1523 ENDIF
1524
1525 IFDEF BIG32
1526 use_32_bit:
1527 .386
1528 mov ds, bignum_seg ; load ds
1529 ; no need to clear r
1530 shr cx, 2 ; byte = 1/4 dword
1531 sub ebx, ebx ; use ebx for temp holding carried dword
1532 sub edi, edi ; clear upper edi
1533 mov di, u ; save u in lower di
1534
1535 top_loop_32:
1536 mov eax, ds:[si] ; load next dword from r
1537 mul edi ; r * u
1538 add eax, ebx ; add last carried upper dword
1539 adc edx, 0 ; inc the carried dword if carry flag set
1540 mov ebx, edx ; save high dword in ebx
1541 mov ds:[si], eax ; save low dword
1542
1543 add si, 4 ; next dword in r
1544 loop top_loop_32
1545 ENDIF
1546
1547 bottom:
1548 .8086
1549
1550 pop ds ; restore ds
1551 mov ax, word ptr r ; return r in ax
1552 ret
1553 mult_a_bn_int ENDP
1554
1555 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1556 ; r = n / u where u is an unsigned integer
1557 unsafe_div_bn_int PROC USES di si, r:bn_t, n:bn_t, u:word
1558 LOCAL sign:byte
1559
1560 push ds
1561 ; check for sign bits
1562 mov bx, WORD PTR n
1563 mov es, bignum_seg ; load n pointer es:bx
1564 add bx, bnlength
1565 mov al, es:[bx-1]
1566 and al, 80h ; check the sign bit
1567 mov sign, al
1568 jz already_pos
1569 invoke neg_a_bn, n
1570 already_pos:
1571
1572 mov cx, bnlength ; set outer loop counter
1573 mov di, word ptr r
1574 mov si, word ptr n ; load pointers ds:si
1575 ; past most significant portion of the number
1576 add si, cx
1577 add di, cx
1578
1579 IFDEF BIG16AND32
1580 cmp cpu, 386 ; check cpu
1581 jae use_32_bit ; use faster 32 bit code if possible
1582 ENDIF
1583
1584 IFDEF BIG16
1585 mov ds, bignum_seg ; load ds
1586
1587 ; no need to clear r here, values get mov'ed, not add'ed
1588 shr cx, 1 ; byte = 1/2 word
1589 mov bx, u
1590
1591 ; need to start with most significant portion of the number
1592 sub si, 2 ; most sig word
1593 sub di, 2 ; most sig word
1594
1595 sub dx, dx ; clear dx register
1596 ; for first time through loop
1597 top_loop_16:
1598 mov ax, ds:[si] ; load next word from n
1599 div bx
1600 mov ds:[di], ax ; store low word
1601 ; leave remainder in dx
1602
1603 sub si, 2 ; next word in n
1604 sub di, 2 ; next word in r
1605 loop top_loop_16
1606 ENDIF
1607
1608 IFDEF BIG16AND32
1609 jmp bottom
1610 ENDIF
1611
1612 IFDEF BIG32
1613 use_32_bit:
1614 .386
1615 mov ds, bignum_seg ; load ds
1616
1617 ; no need to clear r here, values get mov'ed, not add'ed
1618 shr cx, 2 ; byte = 1/4 dword
1619 sub ebx, ebx ; clear upper word or ebx
1620 mov bx, u
1621
1622 ; need to start with most significant portion of the number
1623 sub si, 4 ; most sig dword
1624 sub di, 4 ; most sig dword
1625
1626 sub edx, edx ; clear edx register
1627 ; for first time through loop
1628 top_loop_32:
1629 mov eax, ds:[si] ; load next dword from n
1630 div ebx
1631 mov ds:[di], eax ; store low dword
1632 ; leave remainder in edx
1633
1634 sub si, 4 ; next dword in n
1635 sub di, 4 ; next dword in r
1636 loop top_loop_32
1637 ENDIF
1638
1639 bottom:
1640 .8086
1641
1642 pop ds ; restore ds
1643
1644 cmp sign, 0 ; is result + or - ?
1645 je pos_answer ; yes
1646 invoke neg_a_bn, r ; does not affect ES
1647 pos_answer:
1648
1649 mov ax, word ptr r ; return r in ax
1650 ret
1651 unsafe_div_bn_int ENDP
1652
1653 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1654 ; r /= u where u is an unsigned integer
1655 div_a_bn_int PROC USES si, r:bn_t, u:word
1656 LOCAL sign:byte
1657
1658 push ds
1659
1660 mov bx, WORD PTR r
1661 mov es, bignum_seg ; load r pointer es:bx
1662 add bx, bnlength
1663 mov al, es:[bx-1]
1664 and al, 80h ; check the sign bit
1665 mov sign, al
1666 jz already_pos
1667 invoke neg_a_bn, r
1668 already_pos:
1669
1670 mov cx, bnlength ; set outer loop counter
1671 mov si, WORD PTR r
1672 ; past most significant portion of the number
1673 add si, cx
1674
1675
1676 IFDEF BIG16AND32
1677 cmp cpu, 386 ; check cpu
1678 jae use_32_bit ; use faster 32 bit code if possible
1679 ENDIF
1680
1681 IFDEF BIG16
1682 mov ds, bignum_seg ; load ds
1683
1684 ; no need to clear r here, values get mov'ed, not add'ed
1685 shr cx, 1 ; byte = 1/2 word
1686 mov bx, u
1687
1688 ; need to start with most significant portion of the number
1689 sub si, 2 ; most sig word
1690
1691 sub dx, dx ; clear dx register
1692 ; for first time through loop
1693 top_loop_16:
1694 mov ax, ds:[si] ; load next word from r
1695 div bx
1696 mov ds:[si], ax ; store low word
1697 ; leave remainder in dx
1698
1699 sub si, 2 ; next word in r
1700 loop top_loop_16
1701 ENDIF
1702
1703 IFDEF BIG16AND32
1704 jmp bottom
1705 ENDIF
1706
1707 IFDEF BIG32
1708 use_32_bit:
1709 .386
1710 mov ds, bignum_seg ; load ds
1711
1712 ; no need to clear r here, values get mov'ed, not add'ed
1713 shr cx, 2 ; byte = 1/4 dword
1714 sub ebx, ebx ; clear upper word or ebx
1715 mov bx, u
1716
1717 ; need to start with most significant portion of the number
1718 sub si, 4 ; most sig dword
1719
1720 sub edx, edx ; clear edx register
1721 ; for first time through loop
1722 top_loop_32:
1723 mov eax, ds:[si] ; load next dword from r
1724 div ebx
1725 mov ds:[si], eax ; store low dword
1726 ; leave remainder in edx
1727
1728 sub si, 4 ; next dword in r
1729 loop top_loop_32
1730 ENDIF
1731
1732 bottom:
1733 .8086
1734 pop ds ; restore ds
1735
1736 cmp sign, 0 ; is result + or - ?
1737 je pos_answer ; yes
1738 invoke neg_a_bn, r ; does not affect ES
1739 pos_answer:
1740
1741 mov ax, word ptr r ; return r in ax
1742 ret
1743 div_a_bn_int ENDP
1744
1745 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1746 ; bf_t routines
1747 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1748 ; r = 0 (just like clear_bn() but loads bflength+2 instead of bnlength)
1749 clear_bf PROC USES di, r:bf_t
1750
1751 mov cx, bflength
1752 mov di, word ptr r
1753 mov es, bignum_seg ; load pointer in es:di
1754
1755 IFDEF BIG16AND32
1756 cmp cpu, 386 ; check cpu
1757 jae short use_32_bit ; use faster 32 bit code if possible
1758 ENDIF
1759
1760 IFDEF BIG16
1761 sub ax, ax ; clear ax
1762 shr cx, 1 ; 1 byte = 1/2 word
1763 inc cx ; plus the exponent
1764 rep stosw ; clear r, word at a time
1765 ENDIF
1766
1767 IFDEF BIG16AND32
1768 jmp bottom
1769 ENDIF
1770
1771 IFDEF BIG32
1772 use_32_bit:
1773 .386
1774 sub eax, eax ; clear eax
1775 shr cx, 2 ; 1 byte = 1/4 word
1776 rep stosd ; clear r, dword at a time
1777 stosw ; plus the exponent
1778 ENDIF
1779
1780 bottom:
1781 .8086
1782 mov ax, word ptr r ; return r in ax
1783 ret
1784
1785 clear_bf ENDP
1786
1787 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1788 ; r = n
1789 copy_bf PROC USES di si, r:bf_t, n:bf_t
1790
1791 mov ax, ds ; save ds for later
1792 mov cx, bflength
1793 add cx, 2
1794 mov di, word ptr r
1795 mov es, bignum_seg ; load pointer in es:di
1796 mov si, word ptr n
1797
1798 IFDEF BIG16AND32
1799 cmp cpu, 386 ; check cpu
1800 jae short use_32_bit ; use faster 32 bit code if possible
1801 ENDIF
1802
1803 IFDEF BIG16
1804 mov ds, bignum_seg ; load pointer in ds:si for movs
1805
1806 shr cx, 1 ; 1 byte = 1/2 word
1807 inc cx ; plus the exponent
1808 rep movsw ; copy word at a time
1809 ENDIF
1810
1811 IFDEF BIG16AND32
1812 jmp bottom
1813 ENDIF
1814
1815 IFDEF BIG32
1816 use_32_bit:
1817 .386
1818 mov ds, bignum_seg ; load pointer in ds:si for movs
1819
1820 shr cx, 2 ; 1 byte = 1/4 word
1821 rep movsd ; copy dword at a time
1822 movsw ; plus the exponent
1823 ENDIF
1824
1825 bottom:
1826 .8086
1827 mov ds, ax ; restore ds
1828 mov ax, word ptr r ; return r in ax
1829 ret
1830
1831 copy_bf ENDP
1832
1833 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1834 ; LDBL bftofloat(bf_t n);
1835 ; converts a bf number to a 10 byte real
1836 ;
1837 bftofloat PROC USES di si, n:bf_t
1838 LOCAL value[11]:BYTE ; 11=10+1
1839
1840 mov ax, ds ; save ds
1841
1842 mov cx, 9 ; need up to 9 bytes
1843 cmp bflength, 10 ; but no more than bflength-1
1844 jae movebytes_set
1845 mov cx, bflength ; bflength is less than 10
1846 dec cx ; cx=movebytes=bflength-1, 1 byte padding
1847 movebytes_set:
1848
1849 IFDEF BIG16AND32
1850 cmp cpu, 386 ; check cpu
1851 ; jae use_32_bit ; use faster 32 bit code if possible
1852 jb over_use_32bit
1853 jmp use_32_bit ; use faster 32 bit code if possible
1854 over_use_32bit:
1855 ENDIF
1856
1857 IFDEF BIG16
1858 ; 16 bit code
1859 ; clear value
1860 mov word ptr value[0], 0
1861 mov word ptr value[2], 0
1862 mov word ptr value[4], 0
1863 mov word ptr value[6], 0
1864 mov word ptr value[8], 0
1865 mov byte ptr value[10], 0
1866
1867 ; copy bytes from n to value
1868 lea di, value+9
1869 sub di, cx ; cx holds movebytes
1870 mov dx, ss ; move ss to es for movs
1871 mov es, dx ; ie: move ss:value+9-cx to es:di
1872 mov bx, bflength
1873 dec bx
1874 sub bx, cx ; cx holds movebytes
1875 mov si, word ptr n
1876 mov ds, bignum_seg ; move n to ds:si for movs
1877 add si, bx ; n+bflength-1-movebytes
1878 rep movsb
1879 mov bl, ds:[si] ; save sign byte, si now points to it
1880 inc si ; point to exponent
1881 mov dx, ds:[si] ; use dx as exponent
1882 mov cl, 3 ; put exponent (dx) in base 2
1883 shl dx, cl ; 256^n = 2^(8n)
1884
1885 ; adjust for negative values
1886 and bl, 10000000b ; isolate sign bit
1887 jz not_neg_16
1888 neg word ptr value[0] ; take the negative of the 9 byte number
1889 cmc ; toggle carry flag
1890 not word ptr value[2]
1891 adc word ptr value[2], 0
1892 not word ptr value[4]
1893 adc word ptr value[4], 0
1894 not word ptr value[6]
1895 adc word ptr value[6], 0
1896 not byte ptr value[8] ; notice this last one is byte ptr
1897 adc byte ptr value[8], 0
1898 not_neg_16:
1899
1900 cmp byte ptr value[8], 0 ; test for 0
1901 jnz top_shift_16
1902 fldz
1903 jmp return
1904
1905 ; Shift until most signifcant bit is set.
1906 top_shift_16:
1907 test byte ptr value[8], 10000000b ; test msb
1908 jnz bottom_shift_16
1909 dec dx ; decrement exponent
1910 shl word ptr value[0], 1 ; shift left the 9 byte number
1911 rcl word ptr value[2], 1
1912 rcl word ptr value[4], 1
1913 rcl word ptr value[6], 1
1914 rcl byte ptr value[8], 1 ; notice this last one is byte ptr
1915 jmp top_shift_16
1916 bottom_shift_16:
1917
1918 ; round last byte
1919 cmp byte ptr value[0], 80h ;
1920 ; jb bottom ; no rounding necessary
1921 jnb not_bottom1
1922 jmp bottom ; no rounding necessary
1923 not_bottom1:
1924 add word ptr value[1], 1
1925 adc word ptr value[3], 0
1926 adc word ptr value[5], 0
1927 adc word ptr value[7], 0
1928 ; jnc bottom
1929 jc not_bottom2
1930 jmp bottom
1931 not_bottom2:
1932 ; to get to here, the pattern was rounded from +FFFF...
1933 ; to +10000... with the 1 getting moved to the carry bit
1934 ENDIF
1935
1936 IFDEF BIG16AND32
1937 jmp rounded_past_end
1938 ENDIF
1939
1940 IFDEF BIG32
1941 use_32_bit:
1942 .386
1943 ; clear value
1944 mov dword ptr value[0], 0
1945 mov dword ptr value[4], 0
1946 mov word ptr value[8], 0
1947 mov byte ptr value[10], 0
1948
1949 ; copy bytes from n to value
1950 lea di, value+9
1951 sub di, cx ; cx holds movebytes
1952 mov dx, ss ; move ss to es for movs
1953 mov es, dx ; ie: move ss:value+9-cx to es:di
1954 mov bx, bflength
1955 dec bx
1956 sub bx, cx ; cx holds movebytes
1957 mov si, word ptr n
1958 mov ds, bignum_seg ; move n to ds:si for movs
1959 add si, bx ; n+bflength-1-movebytes
1960 rep movsb
1961 mov bl, ds:[si] ; save sign byte, si now points to it
1962 inc si ; point to exponent
1963 mov dx, ds:[si] ; use dx as exponent
1964 shl dx, 3 ; 256^n = 2^(8n)
1965
1966 ; adjust for negative values
1967 and bl, 10000000b ; determine sign
1968 jz not_neg_32
1969 neg dword ptr value[0] ; take the negative of the 9 byte number
1970 cmc ; toggle carry flag
1971 not dword ptr value[4]
1972 adc dword ptr value[4], 0
1973 not byte ptr value[8] ; notice this last one is byte ptr
1974 adc byte ptr value[8], 0
1975 not_neg_32:
1976
1977 cmp byte ptr value[8], 0 ; test for 0
1978 jnz top_shift_32
1979 fldz
1980 jmp return
1981
1982 ; Shift until most signifcant bit is set.
1983 top_shift_32:
1984 test byte ptr value[8], 10000000b ; test msb
1985 jnz bottom_shift_32
1986 dec dx ; decrement exponent
1987 shl dword ptr value[0], 1 ; shift left the 9 byte number
1988 rcl dword ptr value[4], 1
1989 rcl byte ptr value[8], 1 ; notice this last one is byte ptr
1990 jmp top_shift_32
1991 bottom_shift_32:
1992
1993 ; round last byte
1994 cmp byte ptr value[0], 80h ;
1995 jb bottom ; no rounding necessary
1996 add dword ptr value[1], 1
1997 adc dword ptr value[5], 0
1998 jnc bottom
1999
2000 ; to get to here, the pattern was rounded from +FFFF...
2001 ; to +10000... with the 1 getting moved to the carry bit
2002 ENDIF
2003
2004 rounded_past_end:
2005 .8086 ; used in 16 it code as well
2006 mov byte ptr value[8], 10000000b
2007 inc dx ; adjust the exponent
2008
2009 bottom:
2010 ; adjust exponent
2011 add dx, 3FFFh+7 ; unbiased -> biased, + adjusted
2012 or dh, bl ; set sign bit if set
2013 mov word ptr value[9], dx
2014
2015 ; unlike float and double, long double is returned on fpu stack
2016 fld real10 ptr value[1] ; load return value
2017 return:
2018 mov ds, ax ; restore ds
2019 ret
2020
2021 bftofloat endp
2022
2023 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2024 ; LDBL floattobf(bf_t n, LDBL f);
2025 ; converts a 10 byte real to a bf number
2026 ;
2027 floattobf PROC USES di si, n:bf_t, f:REAL10
2028 LOCAL value[9]:BYTE ; 9=8+1
2029 ; I figured out a way to do this with no local variables,
2030 ; but it's not worth the extra overhead.
invoke clear_bf, n
; check to see if f is 0
cmp byte ptr f[7], 0 ; f[7] can only be 0 if f is 0
; jz return ; if f is 0, bailout now
jnz over_return
jmp return ; if f is 0, bailout now
over_return:
mov cx, 9 ; need up to 9 bytes
cmp bflength, 10 ; but no more than bflength-1
jae movebytes_set
mov cx, bflength ; bflength is less than 10
dec cx ; movebytes = bflength-1, 1 byte padding
movebytes_set:
IFDEF BIG16AND32
cmp cpu, 386 ; check cpu
jae use_32_bit ; use faster 32 bit code if possible
ENDIF
IFDEF BIG16
; 16 bit code
; copy bytes from f's mantissa to value
2031 mov byte ptr value[0], 0 ; clear least sig byte
2032 mov ax, word ptr f[0]
2033 mov word ptr value[1], ax
2034 mov ax, word ptr f[2]
2035 mov word ptr value[3], ax
2036 mov ax, word ptr f[4]
2037 mov word ptr value[5], ax
2038 mov ax, word ptr f[6]
2039 mov word ptr value[7], ax
2040
2041 ; get exponent in dx
2042 mov dx, word ptr f[8] ; location of exponent
2043 and dx, 7FFFh ; remove sign bit
2044 sub dx, 3FFFh+7 ; biased -> unbiased, + adjust
2045
2046 ; Shift down until exponent is a mult of 8 (2^8n=256n)
2047 top_shift_16:
2048 test dx, 111b ; expon mod 8
2049 jz bottom
2050 inc dx ; increment exponent
2051 shr word ptr value[7], 1 ; shift right the 9 byte number
2052 rcr word ptr value[5], 1
2053 rcr word ptr value[3], 1
2054 rcr word ptr value[1], 1
2055 rcr byte ptr value[0], 1 ; notice this last one is byte ptr
2056 jmp top_shift_16
2057 ENDIF
2058
2059 IFDEF BIG32
2060 use_32_bit:
2061 .386
2062 ; copy bytes from f's mantissa to value
mov byte ptr value[0], 0 ; clear least sig byte
mov eax, dword ptr f[0]
mov dword ptr value[1], eax
mov eax, dword ptr f[4]
mov dword ptr value[5], eax
; get exponent in dx
mov dx, word ptr f[8] ; location of exponent
and dx, 7FFFh ; remove sign bit
sub dx, 3FFFh+7 ; biased -> unbiased, + adjust
; Shift down until exponent is a mult of 8 (2^8n=256n)
top_shift_32:
test dx, 111b ; expon mod 8
jz bottom
inc dx ; increment exponent
shr dword ptr value[5], 1 ; shift right the 9 byte number
rcr dword ptr value[1], 1
rcr byte ptr value[0], 1 ; notice this last one is byte ptr
jmp top_shift_32
ENDIF
bottom:
.8086
; Don't bother rounding last byte as it would only make a difference
2063 ; when bflength < 9, and then only on the last bit.
2064
2065 ; move data into place, from value to n
2066 lea si, value+9
2067 sub si, cx ; cx holds movebytes
2068 mov ax, ds ; save ds
2069 mov bx, ss ; copy ss to ds for movs
2070 mov ds, bx ; ds:si
2071 mov di, word ptr n
2072 mov es, bignum_seg ; move n to es:di for movs
2073 add di, bflength
2074 dec di
2075 sub di, cx ; cx holds movebytes
2076 rep movsb
2077 inc di
2078 mov cl, 3
2079 sar dx, cl ; divide expon by 8, 256^n=2^8n
2080 mov word ptr es:[di], dx ; store exponent
2081 mov ds, ax ; restore ds
2082
2083 ; get sign
2084 test byte ptr f[9], 10000000b ; test sign bit
2085 jz not_negative
2086 invoke neg_a_bf, n
2087 not_negative:
2088 return:
2089 mov ax, word ptr n
2090 mov dx, word ptr n+2 ; return r in dx:ax
2091 ret
2092 floattobf endp
2093
2094 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2095 ; LDBL bntofloat(bf_t n);
2096 ; converts a bn number to a 10 byte real
2097 ; (the most speed critical of these to/from float routines)
2098 bntofloat PROC USES di si, n:bn_t
2099 LOCAL value[11]:BYTE ; 11=10+1
2100
2101 ; determine the most significant byte, not 0 or FF
2102 mov si, word ptr n
2103 mov es, bignum_seg
2104 dec si
2105 add si, bnlength ; n+bnlength-1
2106 mov bl, es:[si] ; top byte
2107 mov cx, bnlength ; initialize cx with full bnlength
2108 cmp bl, 0 ; test top byte against 0
2109 je determine_sig_bytes
2110 cmp bl, 0FFh ; test top byte against -1
2111 jne sig_bytes_determined
2112
2113 determine_sig_bytes:
2114 dec cx ; now bnlength-1
2115 top_sig_byte:
2116 dec si ; previous byte
2117 cmp es:[si], bl ; does it have the right stuff?
2118 jne sig_bytes_determined ; (ie: does it match top byte?)
2119 loop top_sig_byte ; decrement cx and repeat
2120
2121 ; At this point, it must be 0 with no sig figs at all
2122 ; or -1/(256^bnlength), one bit away from being zero.
2123 cmp bl, 0 ; was it zero?
2124 jnz not_zero ; no, it was a very small negative
2125 ; yes
2126 fldz ; return zero
2127 jmp return
2128 not_zero:
2129 mov ax, intlength
2130 sub ax, bnlength
2131 mov cl, 3
2132 shl ax, cl ; 256^n=2^8n, now more like movebits
2133 add ax, 3FFFh+0 ; bias, no adjustment necessary
2134 or ah, 10000000b ; turn on sign flag
2135 mov word ptr value[9], ax ; store exponent
2136 mov word ptr value[7], 8000h ; store mantissa of 1 in most sig bit
2137 ; clear rest of value that is actually used
2138 mov word ptr value[1], 0
2139 mov word ptr value[3], 0
2140 mov word ptr value[5], 0
2141
2142 fld real10 ptr value[1]
2143 jmp return
2144
2145 sig_bytes_determined:
2146 mov dx, cx ; save in dx for later
2147 cmp cx, 9-1 ; no more than cx bytes
2148 jb set_movebytes
2149 mov cx, 9-1 ; up to 8 bytes
2150 set_movebytes: ; cx now holds movebytes
2151 ; si still points to most non-0 sig byte
2152 sub si, cx ; si now points to first byte to be moved
2153 inc cx ; can be up to 9
2154
2155 IFDEF BIG16AND32
2156 cmp cpu, 386 ; check cpu
2157 ; jae use_32_bit ; use faster 32 bit code if possible
2158 jb not_use_32_bit
2159 jmp use_32_bit ; use faster 32 bit code if possible
2160 not_use_32_bit:
2161 ENDIF
2162
2163 IFDEF BIG16
2164 ; 16 bit code
2165 ; clear value
2166 mov word ptr value[0], 0
2167 mov word ptr value[2], 0
2168 mov word ptr value[4], 0
2169 mov word ptr value[6], 0
2170 mov word ptr value[8], 0
2171 mov byte ptr value[10], 0
2172
2173 ; copy bytes from n to value ; es:si still holds first move byte of n
2174 lea di, value+9
2175 sub di, cx ; cx holds movebytes
2176 mov ax, ss ; move ss to es
2177 mov es, ax ; value[9] is in es:di
2178 mov ax, ds ; save ds
2179 mov ds, bignum_seg ; first move byte of n is now in ds:si
2180 rep movsb
2181 mov ds, ax ; restore ds
2182
2183 ; adjust for negative values
2184 xor ax, ax ; use ax as a flag
2185 ; get sign flag ; top byte is still in bl
2186 and bl, 10000000b ; isolate the sign bit
2187 jz not_neg_16
2188 neg word ptr value[0] ; take the negative of the 9 byte number
2189 cmc ; toggle carry flag
2190 not word ptr value[2]
2191 adc word ptr value[2], 0
2192 not word ptr value[4]
2193 adc word ptr value[4], 0
2194 not word ptr value[6]
2195 adc word ptr value[6], 0
2196 not byte ptr value[8] ; notice this last one is byte ptr
2197 adc byte ptr value[8], 0
2198 jnc not_neg_16 ; normal
2199 mov byte ptr value[8], 10000000b ;n was FFFF...0000...
2200 inc ax ; set ax to 1 to flag this special case
2201
2202 not_neg_16:
2203 sub dx, bnlength ; adjust exponent
2204 add dx, intlength ; adjust exponent
2205 mov cl, 3
2206 shl dx, cl ; 256^n=2^8n
2207 add dx, ax ; see special case above
2208 ; Shift until most signifcant bit is set.
2209 top_shift_16:
2210 test byte ptr value[8], 10000000b ; test msb
2211 ; jnz bottom
2212 jz over_bottom
2213 jmp bottom
2214 over_bottom:
2215 dec dx ; decrement exponent
2216 shl word ptr value[0], 1 ; shift left the 9 byte number
2217 rcl word ptr value[2], 1
2218 rcl word ptr value[4], 1
2219 rcl word ptr value[6], 1
2220 rcl byte ptr value[8], 1 ; notice this last one is byte ptr
2221 jmp top_shift_16
2222
2223 ; don't bother rounding, not really needed while speed is.
ENDIF
IFDEF BIG32
use_32_bit:
.386
; clear value
mov dword ptr value[0], 0
mov dword ptr value[4], 0
mov word ptr value[8], 0
mov byte ptr value[10], 0
; copy bytes from n to value ; es:si still holds first move byte of n
lea di, value+9
sub di, cx ; cx holds movebytes
mov ax, ss ; move ss to es
mov es, ax ; value[9] is in es:di
mov ax, ds ; save ds
mov ds, bignum_seg ; first move byte of n is now in ds:si
rep movsb
mov ds, ax ; restore ds
; adjust for negative values
xor ax, ax ; use ax as a flag
; get sign flag ; top byte is still in bl
and bl, 10000000b ; determine sign
jz not_neg_32
neg dword ptr value[0] ; take the negative of the 9 byte number
cmc ; toggle carry flag
not dword ptr value[4]
adc dword ptr value[4], 0
not byte ptr value[8] ; notice this last one is byte ptr
adc byte ptr value[8], 0
jnc not_neg_32 ; normal
mov byte ptr value[8], 10000000b ;n was FFFF...0000...
inc ax ; set ax to 1 to flag this special case
not_neg_32:
sub dx, bnlength ; adjust exponent
add dx, intlength ; adjust exponent
shl dx, 3 ; 256^n=2^8n
add dx, ax ; see special case above
; Shift until most signifcant bit is set.
top_shift_32:
test byte ptr value[8], 10000000b ; test msb
jnz bottom
dec dx ; decrement exponent
shl dword ptr value[0], 1 ; shift left the 9 byte number
rcl dword ptr value[4], 1
rcl byte ptr value[8], 1 ; notice this last one is byte ptr
jmp top_shift_32
; don't bother rounding, not really needed while speed is.
2224 ENDIF
2225
2226 bottom:
2227 .8086
2228 ; adjust exponent
2229 add dx, 3FFFh+7-8 ; unbiased -> biased, + adjusted
2230 or dh, bl ; set sign bit if set
2231 mov word ptr value[9], dx
2232
2233 ; unlike float and double, long double is returned on fpu stack
2234 fld real10 ptr value[1] ; load return value
2235 return:
2236 ret
2237
2238 bntofloat endp
2239
2240 ;
2241 ; LDBL floattobn(bf_t n, LDBL f) is in BIGNUM.C
2242 ;
2243
2244 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2245 ; These last two functions do not use bignum type numbers, but take
2246 ; long doubles as arguments. These routines are called by the C code.
2247 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2248
2249 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2250 ; LDBL extract_256(LDBL f, int *exp_ptr)
2251 ;
2252 ; extracts the mantissa and exponant of f
2253 ; finds m and n such that 1<=|m|<256 and f = m*256^n
2254 ; n is stored in *exp_ptr and m is returned, sort of like frexp()
2255
2256 extract_256 PROC f:real10, exp_ptr: ptr sword
2257 local expon:sword, exf:real10, tmp_word:word
2258
2259 fld f ; f
2260 ftst ; test for zero
2261 fstsw tmp_word
2262 fwait
2263 mov ax,tmp_word
2264 sahf
2265 jnz not_zero ; proceed
2266
2267 mov bx, exp_ptr
2268 mov word ptr [bx], 0 ; save = in *exp_ptr
2269 jmp bottom ; f, which is zero, is already on stack
2270
2271 not_zero:
2272
2273 ; since a key fpu operation, fxtract, is not emulated by the MS floating
2274 ; point library, separate code is included under use_emul:
2275 cmp fpu, 0
2276 je use_emul
2277
2278 ; f is already on stack
2279 fxtract ; mant exp, where f=mant*2^exp
2280 fxch ; exp mant
2281 fistp expon ; mant
2282 fwait
2283 mov ax, expon
2284 mov dx, ax ; make copy for later use
2285
2286 cmp ax, 0 ;
2287 jge pos_exp ; jump if exp >= 0
2288
2289 ; exp is neg, adjust exp
2290 add ax, 8 ; exp+8
2291
2292 pos_exp:
2293 ; adjust mantissa
2294 and ax, 7 ; ax mod 8
2295 jz adjust_exponent ; don't bother with zero adjustments
mov expon, ax ; use expon as a temp var
fild expon ; exp mant
fxch ; mant exp
fscale ; mant*2^exp exp
fstp st(1) ; mant*2^exp (store in 1 and pop)
adjust_exponent:
mov cl, 3
sar dx, cl ; exp / 8
mov bx, exp_ptr
mov [bx], dx ; save in *exp_ptr
fwait
jmp bottom
use_emul:
; emulate above code by direct manipulation of 80 bit floating point format
; f is already on stack
fstp exf
mov ax, word ptr exf+8 ; get word with the exponent in it
mov dx, ax ; make copy for later use
and dx, 8000h ; keep just the sign bit
or dx, 3FFFh ; 1<=f<2
and ax, 7FFFh ; throw away the sign bit
sub ax, 3FFFh ; unbiased -> biased
mov bx, ax
cmp bx, 0
jge pos_exp_emul
add bx, 8 ; adjust negative exponent
pos_exp_emul:
and bx, 7 ; bx mod 8
add dx, bx
mov word ptr exf+8, dx ; put back word with the exponent in it
mov cl, 3
sar ax, cl ; div by 8, 2^(8n) = 256^n
mov bx, exp_ptr
mov [bx], ax ; save in *exp_ptr
fld exf ; for return value
bottom:
; unlike float and double, long double is returned on fpu stack
ret
extract_256 ENDP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; LDBL scale_256( LDBL f, int n );
; calculates and returns the value of f*256^n
; sort of like ldexp()
;
; n must be in the range -2^12 <= n < 2^12 (2^12=4096),
; which should not be a problem
scale_256 PROC f:real10, n: sword
cmp n, 0
jne non_zero
fld f
jmp bottom ; don't bother with scales of zero
2296
2297 non_zero:
2298 mov cl, 3
2299 shl n, cl ; 8n
2300 fild n ; 8n
2301 fld f ; f 8n
2302 ; the fscale range limits for 8087/287 processors won't be a problem here
fscale ; new_f=f*2^(8n)=f*256^n 8n
fstp st(1) ; new_f
bottom:
; unlike float and double, long double is returned on fpu stack
ret
scale_256 ENDP
END