C:\Data\fractdev\fractint\fractint\dos\bignuma.asm

File: dos\bignuma.asm

    1 ; bignuma.asm
    2 
    3 ; based on:
    4 ; bbignuma.asm - asm routines for bignumbers
    5 ; Wesley Loewer's Big Numbers.        (C) 1994-95, Wesley B. Loewer
; based pointer version

; See BIGLIB.TXT for further documentation.

; general programming notes for bases pointer version
; ALL big_t pointers must have a segment value equal to bignum_seg.
; single arg procedures, p(r), r = bx (or si when required)
; two arg procedures,    p(r,n), r=di, n=bx(or si when required)
; two arg procedures,    p(n1,n2), n1=bx(or si when required), n2=di
; three arg proc,        p(r,n1,n2), r=di, n1=si, n2=bx
; unless otherwise noted, such as full_mult, mult, full_square, square

.MODEL medium, c

include big.inc
include bigport.inc

.DATA

.CODE
.8086

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; r = 0
clear_bn   PROC USES di, r:bn_t

        mov     cx, bnlength
        mov     di, word ptr r
        mov     es, bignum_seg          ; load pointer in es:di

IFDEF BIG16AND32
        cmp     cpu, 386                ; check cpu
        jae     short use_32_bit        ; use faster 32 bit code if possible
ENDIF

IFDEF BIG16
        sub     ax, ax                  ; clear ax
        shr     cx, 1                   ; 1 byte = 1/2 word
        rep     stosw                   ; clear r, word at a time
ENDIF

IFDEF BIG16AND32
        jmp     bottom
ENDIF

IFDEF BIG32
use_32_bit:
.386
        sub     eax, eax                ; clear eax
        shr     cx, 2                   ; 1 byte = 1/4 word
        rep     stosd                   ; clear r, dword at a time
ENDIF

bottom:
.8086
        mov     ax, word ptr r          ; return r in ax
        ret

clear_bn   ENDP

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; r = max positive value
max_bn   PROC USES di, r:bn_t

        mov     cx, bnlength
        mov     di, word ptr r
        mov     es, bignum_seg          ; load pointer in es:di

IFDEF BIG16AND32
        cmp     cpu, 386                ; check cpu
        jae     short use_32_bit        ; use faster 32 bit code if possible
ENDIF

IFDEF BIG16
        mov     ax, 0FFFFh              ; set ax to max value
        shr     cx, 1                   ; 1 byte = 1/2 word
        rep     stosw                   ; max out r, word at a time
ENDIF

IFDEF BIG16AND32
        jmp     bottom
ENDIF

IFDEF BIG32
use_32_bit:
.386
        mov     eax, 0FFFFFFFFh         ; set eax to max value
        shr     cx, 2                   ; 1 byte = 1/4 word
        rep     stosd                   ; max out r, dword at a time
ENDIF

bottom:
.8086
        ; when the above stos is finished, di points to the byte past the end
        mov     byte ptr es:[di-1], 7Fh       ; turn off the sign bit

        mov     ax, word ptr r              ; return r in ax
        ret

max_bn   ENDP

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; r = n
copy_bn   PROC USES di si, r:bn_t, n:bn_t

        mov     ax, ds                  ; save ds for later
        mov     cx, bnlength
        mov     di, word ptr r
        mov     es, bignum_seg          ; load pointer in es:di
        mov     si, word ptr n

IFDEF BIG16AND32
        cmp     cpu, 386                ; check cpu
        jae     short use_32_bit        ; use faster 32 bit code if possible
ENDIF

IFDEF BIG16
        mov     ds, bignum_seg          ; load pointer in ds:si for movs

        shr     cx, 1                   ; 1 byte = 1/2 word
        rep     movsw                   ; copy word at a time
ENDIF

IFDEF BIG16AND32
        jmp     bottom
ENDIF

IFDEF BIG32
use_32_bit:
.386
        mov     ds, bignum_seg          ; load pointer in ds:si for movs

        shr     cx, 2                   ; 1 byte = 1/4 word
        rep     movsd                   ; copy dword at a time
ENDIF

bottom:
.8086
        mov     ds, ax                  ; restore ds
        mov     ax, word ptr r          ; return r in ax
        ret

copy_bn   ENDP

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; n1 != n2 ?
; RETURNS: if n1 == n2 returns 0
;          if n1 > n2 returns a positive (steps left to go when mismatch occured)
;          if n1 < n2 returns a negative (steps left to go when mismatch occured)
cmp_bn   PROC USES di, n1:bn_t, n2:bn_t

        push    ds                      ; save DS
        mov     cx, bnlength
        mov     dx, cx                  ; save bnlength for later comparison
        mov     di, word ptr n2         ; load n2 pointer in di
        mov     bx, word ptr n1         ; load n1 pointer in bx

        add     bx, cx                  ; point to end of bignumbers
        add     di, cx                  ; where the msb is

IFDEF BIG16AND32
        cmp     cpu, 386                ; check cpu
        jae     short use_32_bit        ; use faster 32 bit code if possible
ENDIF

IFDEF BIG16
        mov     ds, bignum_seg          ; load ds
        shr     cx, 1                   ; byte = 1/2 word
top_loop_16:
        sub     bx, 2                   ; decrement to previous word
        sub     di, 2
        mov     ax, ds:[bx]             ; load n1
        cmp     ax, ds:[di]             ; compare to n2
        jne     not_match_16            ; don't match
    6         loop    top_loop_16
    7         jmp     match                   ; cx is zero
    8 not_match_16:
    9         ; now determine which byte of the two did not match
   10         shl     cx, 1                   ; convert back to bytes
   11         cmp     ah, ds:[di+1]           ; compare to n2
   12         jne     bottom                  ; jump if ah doesn't match
        ; if ah does match, then mismatch was in al
        dec     cx                      ; decrement cx by 1 to show match
        cmp     al, ds:[di]             ; reset the flags for below
        jmp     bottom

ENDIF

IFDEF BIG32
use_32_bit:
.386
        mov     ds, bignum_seg          ; load ds
        shr     cx, 2                   ; byte = 1/4 dword
top_loop_32:
        sub     bx, 4                   ; decrement to previous dword
        sub     di, 4
        mov     eax, ds:[bx]            ; load n1
        cmp     eax, ds:[di]            ; compare to n2
        jne     not_match_32            ; don't match
   13         loop    top_loop_32
   14         jmp     match                   ; cx is zero
   15 not_match_32:
   16         ; now determine which byte of the four did not match
   17         shl     cx, 2                   ; convert back to bytes
   18         mov     ebx, eax
   19         shr     ebx, 16                 ; shift ebx_high to bx
   20         cmp     bh, ds:[di+3]           ; compare to n2
   21         jne     bottom                  ; jump if bh doesn't match
        dec     cx                      ; decrement cx by 1 to show match
        cmp     bl, ds:[di+2]           ; compare to n2
        jne     bottom                  ; jump if bl doesn't match
   22         dec     cx                      ; decrement cx by 1 to show match
   23         cmp     ah, ds:[di+1]           ; compare to n2
   24         jne     bottom                  ; jump if ah doesn't match
        ; if bh,bl,ah do match, then mismatch was in al
        dec     cx                      ; decrement cx by 1 to show match
        cmp     al, ds:[di]             ; reset the flags for below
        jmp     bottom

ENDIF

bottom:
.8086
; flags are still set from last cmp
; if cx == dx, then most significant part didn't match, use signed comparison
   25 ; else the decimals didn't match, use unsigned comparison
        lahf                            ; load results of last cmp
        cmp     cx, dx                  ; did they differ on very first cmp
        jne     not_first_step          ; no

        sahf                            ; yes
        jg      n1_bigger               ; signed comparison
        jmp     n2_bigger

not_first_step:
        sahf
        ja      n1_bigger               ; unsigned comparison

n2_bigger:
        neg     cx                      ; make it negative
n1_bigger:                              ; leave it positive
match:                                  ; leave it zero
        mov     ax, cx
        pop     ds                      ; restore DS
        ret

cmp_bn   ENDP

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; r < 0 ?
; returns 1 if negative, 0 if positive or zero
is_bn_neg   PROC n:bn_t

        ; for a one-pass routine like this, don't bother with ds
   26         mov     bx, word ptr n
   27         mov     es, bignum_seg              ; load n pointer in es:bx
   28 
   29         add     bx, bnlength                ; find sign bit
   30         mov     al, es:[bx-1]               ; got it
   31 
   32         and     al, 80h                     ; check the sign bit
   33         rol     al, 1                       ; rotate sign big to bit 0
   34         sub     ah, ah                      ; clear upper ax
   35         ret
   36 
   37 is_bn_neg   ENDP
   38 
   39 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
   40 ; n != 0 ?
   41 ; RETURNS: if n != 0 returns 1
   42 ;          else returns 0
   43 is_bn_not_zero   PROC n:bn_t
   44 
   45         mov     ax, ds                  ; save DS
   46         mov     cx, bnlength
   47         mov     bx, word ptr n
   48 
   49 IFDEF BIG16AND32
   50         cmp     cpu, 386                ; check cpu
   51         jae     short use_32_bit        ; use faster 32 bit code if possible
   52 ENDIF
   53 
   54 IFDEF BIG16
   55         mov     ds, bignum_seg          ; load n pointer in ds:bx
   56         shr     cx, 1                   ; byte = 1/2 word
   57 top_loop_16:
   58         cmp     word ptr ds:[bx], 0     ; compare to n to 0
   59         jnz     bottom                  ; not zero
   60         add     bx, 2                   ; increment to next word
   61         loop    top_loop_16
   62 ENDIF
   63 
   64 IFDEF BIG16AND32
   65         jmp     bottom
   66 ENDIF
   67 
   68 IFDEF BIG32
   69 use_32_bit:
   70 .386
   71         mov     ds, bignum_seg          ; load n pointer in ds:bx
   72         shr     cx, 2                   ; byte = 1/4 dword
   73 top_loop_32:
   74         cmp     dword ptr ds:[bx], 0    ; compare to n to 0
   75         jnz     bottom                  ; not zero
   76         add     bx, 4                   ; increment to next dword
   77         loop    top_loop_32
   78         jmp     bottom
   79 ENDIF
   80 
   81 bottom:
   82 .8086
   83         mov     ds, ax                  ; restore DS
   84         ; if cx is zero, then n was zero
   85         mov     ax, cx
   86         ret
   87 
   88 is_bn_not_zero   ENDP
   89 
   90 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
   91 ; r = n1 + n2
   92 add_bn   PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t
   93 
   94         mov     dx, ds                  ; save ds
   95         mov     cx, bnlength
   96         mov     di, WORD PTR r
   97         mov     si, WORD PTR n1
   98         mov     bx, WORD PTR n2
   99 
  100 
  101 IFDEF BIG16AND32
  102         cmp     cpu, 386                ; check cpu
  103         jae     short use_32_bit        ; use faster 32 bit code if possible
  104 ENDIF
  105 
  106 IFDEF BIG16
  107         mov     ds, bignum_seg          ; load ds
  108 
  109         shr     cx, 1                   ; byte = 1/2 word
  110         clc                             ; clear carry flag
  111 
  112 top_loop_16:
  113         mov     ax, ds:[si]             ; n1
  114         adc     ax, ds:[bx]             ; n1+n2
  115         mov     ds:[di], ax             ; r = n1+n2
  116 
  117                                         ; inc does not change carry flag
  118         inc     di                      ; add  di, 2
  119         inc     di
  120         inc     si                      ; add  si, 2
  121         inc     si
  122         inc     bx                      ; add  bx, 2
  123         inc     bx
  124 
  125         loop    top_loop_16
  126 
  127 ENDIF
  128 
  129 IFDEF BIG16AND32
  130         jmp     short bottom
  131 ENDIF
  132 
  133 IFDEF BIG32
  134 use_32_bit:
  135 .386
  136         mov     ds, bignum_seg          ; load ds
  137 
  138         shr     cx, 2                   ; byte = 1/4 double word
  139         clc                             ; clear carry flag
  140 
  141 top_loop_32:
  142         mov     eax, ds:[si]            ; n1
  143         adc     eax, ds:[bx]            ; n1+n2
  144         mov     ds:[di], eax            ; r = n1+n2
  145 
  146         lahf                            ; save carry flag
  147         add     di, 4                   ; increment by double word size
  148         add     si, 4
  149         add     bx, 4
  150         sahf                            ; restore carry flag
  151 
  152         loop    top_loop_32
  153 ENDIF
  154 
  155 bottom:
  156 .8086
  157         mov     ds, dx                  ; restore ds
  158         mov     ax, word ptr r          ; return r in ax
  159         ret
  160 add_bn   ENDP
  161 
  162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  163 ; r += n
  164 add_a_bn   PROC USES di, r:bn_t, n:bn_t
  165 
  166         mov     dx, ds                  ; save ds
  167         mov     cx, bnlength
  168         mov     di, WORD PTR r
  169         mov     bx, WORD PTR n
  170 
  171 IFDEF BIG16AND32
  172         cmp     cpu, 386                ; check cpu
  173         jae     short use_32_bit        ; use faster 32 bit code if possible
  174 ENDIF
  175 
  176 IFDEF BIG16
  177         mov     ds, bignum_seg          ; load ds
  178 
  179         shr     cx, 1                   ; byte = 1/2 word
  180         clc                             ; clear carry flag
  181 
  182 top_loop_16:
  183         mov     ax, ds:[bx]             ; n
  184         adc     ds:[di], ax             ; r += n
  185 
  186                                         ; inc does not change carry flag
  187         inc     di                      ; add  di, 2
  188         inc     di
  189         inc     bx                      ; add  di, 2
  190         inc     bx
  191 
  192         loop    top_loop_16
  193 ENDIF
  194 
  195 IFDEF BIG16AND32
  196         jmp     short bottom
  197 ENDIF
  198 
  199 IFDEF BIG32
  200 use_32_bit:
  201 .386
  202         mov     ds, bignum_seg          ; load ds
  203 
  204         shr     cx, 2                   ; byte = 1/4 double word
  205         clc                             ; clear carry flag
  206 
  207 top_loop_32:
  208         mov     eax, ds:[bx]            ; n
  209         adc     ds:[di], eax            ; r += n
  210 
  211         lahf                            ; save carry flag
  212         add     di, 4                   ; increment by double word size
  213         add     bx, 4
  214         sahf                            ; restore carry flag
  215 
  216         loop    top_loop_32
  217 ENDIF
  218 
  219 bottom:
  220 .8086
  221         mov     ds, dx                  ; restore ds
  222         mov     ax, word ptr r          ; return r in ax
  223         ret
  224 add_a_bn   ENDP
  225 
  226 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  227 ; r = n1 - n2
  228 sub_bn   PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t
  229 
  230         mov     dx, ds                  ; save ds
  231         mov     cx, bnlength
  232         mov     di, WORD PTR r
  233         mov     si, WORD PTR n1
  234         mov     bx, WORD PTR n2
  235 
  236 
  237 IFDEF BIG16AND32
  238         cmp     cpu, 386                ; check cpu
  239         jae     short use_32_bit        ; use faster 32 bit code if possible
  240 ENDIF
  241 
  242 IFDEF BIG16
  243         mov     ds, bignum_seg          ; load ds
  244 
  245         shr     cx, 1                   ; byte = 1/2 word
  246         clc                             ; clear carry flag
  247 
  248 top_loop_16:
  249         mov     ax, ds:[si]             ; n1
  250         sbb     ax, ds:[bx]             ; n1-n2
  251         mov     ds:[di], ax             ; r = n1-n2
  252 
  253                                         ; inc does not change carry flag
  254         inc     di                      ; add  di, 2
  255         inc     di
  256         inc     si                      ; add  si, 2
  257         inc     si
  258         inc     bx                      ; add  bx, 2
  259         inc     bx
  260 
  261         loop    top_loop_16
  262 ENDIF
  263 
  264 IFDEF BIG16AND32
  265         jmp     short bottom
  266 ENDIF
  267 
  268 IFDEF BIG32
  269 use_32_bit:
  270 .386
  271         mov     ds, bignum_seg          ; load ds
  272 
  273         shr     cx, 2                   ; byte = 1/4 double word
  274         clc                             ; clear carry flag
  275 
  276 top_loop_32:
  277         mov     eax, ds:[si]            ; n1
  278         sbb     eax, ds:[bx]            ; n1-n2
  279         mov     ds:[di], eax            ; r = n1-n2
  280 
  281         lahf                            ; save carry flag
  282         add     di, 4                   ; increment by double word size
  283         add     si, 4
  284         add     bx, 4
  285         sahf                            ; restore carry flag
  286 
  287         loop    top_loop_32
  288 ENDIF
  289 
  290 bottom:
  291 .8086
  292 
  293         mov     ds, dx                  ; restore ds
  294         mov     ax, word ptr r          ; return r in ax
  295         ret
  296 sub_bn   ENDP
  297 
  298 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  299 ; r -= n
  300 sub_a_bn   PROC USES di, r:bn_t, n:bn_t
  301 
  302         mov     dx, ds                  ; save ds
  303         mov     cx, bnlength
  304         mov     di, WORD PTR r
  305         mov     bx, WORD PTR n
  306 
  307 IFDEF BIG16AND32
  308         cmp     cpu, 386                ; check cpu
  309         jae     short use_32_bit        ; use faster 32 bit code if possible
  310 ENDIF
  311 
  312 IFDEF BIG16
  313         mov     ds, bignum_seg          ; load ds
  314 
  315         shr     cx, 1                   ; byte = 1/2 word
  316         clc                             ; clear carry flag
  317 
  318 top_loop_16:
  319         mov     ax, ds:[bx]             ; n
  320         sbb     ds:[di], ax             ; r -= n
  321 
  322                                         ; inc does not change carry flag
  323         inc     di                      ; add  di, 2
  324         inc     di
  325         inc     bx                      ; add  di, 2
  326         inc     bx
  327 
  328         loop    top_loop_16
  329 ENDIF
  330 
  331 IFDEF BIG16AND32
  332         jmp     short bottom
  333 ENDIF
  334 
  335 IFDEF BIG32
  336 use_32_bit:
  337 .386
  338         mov     ds, bignum_seg          ; load ds
  339 
  340         shr     cx, 2                   ; byte = 1/4 double word
  341         clc                             ; clear carry flag
  342 
  343 top_loop_32:
  344         mov     eax, ds:[bx]            ; n
  345         sbb     ds:[di], eax            ; r -= n
  346 
  347         lahf                            ; save carry flag
  348         add     di, 4                   ; increment by double word size
  349         add     bx, 4
  350         sahf                            ; restore carry flag
  351 
  352         loop    top_loop_32
  353 
  354 ENDIF
  355 
  356 bottom:
  357 .8086
  358 
  359         mov     ds, dx                  ; restore ds
  360         mov     ax, word ptr r          ; return r in ax
  361         ret
  362 sub_a_bn   ENDP
  363 
  364 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  365 ; r = -n
  366 neg_bn   PROC USES di, r:bn_t, n:bn_t
  367 
  368         mov     dx, ds                  ; save ds
  369         mov     cx, bnlength
  370         mov     di, WORD PTR r
  371         mov     bx, WORD PTR n
  372 
  373 IFDEF BIG16AND32
  374         cmp     cpu, 386
  375         jae     short use_32_bit        ; use faster 32 bit code if possible
  376 ENDIF
  377 
  378 IFDEF BIG16
  379         mov     ds, bignum_seg          ; load ds
  380 
  381         shr     cx, 1                   ; byte = 1/2 word
  382 
  383 top_loop_16:
  384         mov     ax, ds:[bx]
  385         neg     ax
  386         mov     ds:[di], ax
  387         jc      short no_more_carry_16  ; notice the "reverse" logic here
  388 
  389         add     di, 2                   ; increment by word size
  390         add     bx, 2
  391 
  392         loop    top_loop_16
  393         jmp     short bottom
  394 
  395 no_more_carry_16:
  396         add     di, 2
  397         add     bx, 2
  398         loop    top_loop_no_more_carry_16   ; jump down
  399         jmp     short bottom
  400 
  401 top_loop_no_more_carry_16:
  402         mov     ax, ds:[bx]
  403         not     ax
  404         mov     ds:[di], ax
  405 
  406         add     di, 2
  407         add     bx, 2
  408 
  409         loop    top_loop_no_more_carry_16
  410 ENDIF
  411 
  412 IFDEF BIG16AND32
  413         jmp     short bottom
  414 ENDIF
  415 
  416 IFDEF BIG32
  417 use_32_bit:
  418 .386
  419         mov     ds, bignum_seg          ; load ds
  420 
  421         shr     cx, 2                   ; byte = 1/4 dword
  422 
  423 top_loop_32:
  424         mov     eax, ds:[bx]
  425         neg     eax
  426         mov     ds:[di], eax
  427         jc      short no_more_carry_32   ; notice the "reverse" logic here
  428 
  429         add     di, 4                   ; increment by double word size
  430         add     bx, 4
  431 
  432         loop    top_loop_32
  433         jmp     short bottom
  434 
  435 no_more_carry_32:
  436         add     di, 4                   ; increment by double word size
  437         add     bx, 4
  438         loop    top_loop_no_more_carry_32   ; jump down
  439         jmp     short bottom
  440 
  441 top_loop_no_more_carry_32:
  442         mov     eax, ds:[bx]
  443         not     eax
  444         mov     ds:[di], eax
  445 
  446         add     di, 4                   ; increment by double word size
  447         add     bx, 4
  448 
  449         loop    top_loop_no_more_carry_32
  450 ENDIF
  451 
  452 bottom:
  453 .8086
  454 
  455         mov     ds, dx                  ; restore ds
  456         mov     ax, word ptr r          ; return r in ax
  457         ret
  458 neg_bn   ENDP
  459 
  460 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  461 ; r *= -1
  462 neg_a_bn   PROC r:bn_t
  463 
  464         mov     ax, ds                  ; save ds
  465         mov     cx, bnlength
  466         mov     bx, WORD PTR r
  467 
  468 IFDEF BIG16AND32
  469         cmp     cpu, 386
  470         jae     short use_32_bit        ; use faster 32 bit code if possible
  471 ENDIF
  472 
  473 IFDEF BIG16
  474         mov     ds, bignum_seg          ; load ds
  475         shr     cx, 1                   ; byte = 1/2 word
  476 
  477 top_loop_16:
  478         neg     word ptr ds:[bx]
  479         jc      short no_more_carry_16  ; notice the "reverse" logic here
  480 
  481         add     bx, 2
  482 
  483         loop    top_loop_16
  484         jmp     short bottom
  485 
  486 no_more_carry_16:
  487         add     bx, 2
  488         loop    top_loop_no_more_carry_16   ; jump down
  489         jmp     short bottom
  490 
  491 top_loop_no_more_carry_16:
  492         not     word ptr ds:[bx]
  493 
  494         add     bx, 2
  495 
  496         loop    top_loop_no_more_carry_16
  497 ENDIF
  498 
  499 IFDEF BIG16AND32
  500         jmp     short bottom
  501 ENDIF
  502 
  503 IFDEF BIG32
  504 use_32_bit:
  505 .386
  506         mov     ds, bignum_seg          ; load ds
  507         shr     cx, 2                   ; byte = 1/4 dword
  508 
  509 top_loop_32:
  510         neg     dword ptr ds:[bx]
  511         jc      short no_more_carry_32   ; notice the "reverse" logic here
  512 
  513         add     bx, 4
  514 
  515         loop    top_loop_32
  516         jmp     short bottom
  517 
  518 no_more_carry_32:
  519         add     bx, 4
  520         loop    top_loop_no_more_carry_32   ; jump down
  521         jmp     short bottom
  522 
  523 top_loop_no_more_carry_32:
  524         not     dword ptr ds:[bx]
  525 
  526         add     bx, 4
  527 
  528         loop    top_loop_no_more_carry_32
  529 ENDIF
  530 
  531 bottom:
  532 .8086
  533         mov     ds, ax                  ; restore ds
  534         mov     ax, word ptr r          ; return r in ax
  535         ret
  536 neg_a_bn   ENDP
  537 
  538 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  539 ; r = 2*n
  540 double_bn   PROC USES di, r:bn_t, n:bn_t
  541 
  542         mov     dx, ds                  ; save ds
  543         mov     cx, bnlength
  544         mov     di, WORD PTR r
  545         mov     bx, WORD PTR n
  546 
  547 IFDEF BIG16AND32
  548         cmp     cpu, 386
  549         jae     short use_32_bit        ; use faster 32 bit code if possible
  550 ENDIF
  551 
  552 IFDEF BIG16
  553         mov     ds, bignum_seg          ; load ds
  554 
  555         shr     cx, 1                   ; byte = 1/2 word
  556         clc
  557 
  558 top_loop_16:
  559         mov     ax, ds:[bx]
  560         rcl     ax, 1                   ; rotate with carry left
  561         mov     ds:[di], ax
  562 
  563                                         ; inc does not change carry flag
  564         inc     di                      ; add  di, 2
  565         inc     di
  566         inc     bx                      ; add bx, 2
  567         inc     bx
  568 
  569         loop    top_loop_16
  570 ENDIF
  571 
  572 IFDEF BIG16AND32
  573         jmp     short bottom
  574 ENDIF
  575 
  576 IFDEF BIG32
  577 use_32_bit:
  578 .386
  579         mov     ds, bignum_seg          ; load ds
  580 
  581         shr     cx, 2                   ; byte = 1/4 dword
  582         clc                             ; clear carry flag
  583 
  584 top_loop_32:
  585         mov     eax, ds:[bx]
  586         rcl     eax, 1                  ; rotate with carry left
  587         mov     ds:[di], eax
  588 
  589         lahf                            ; save carry flag
  590         add     di, 4                   ; increment by double word size
  591         add     bx, 4
  592         sahf                            ; restore carry flag
  593 
  594         loop    top_loop_32
  595 
  596 ENDIF
  597 bottom:
  598 .8086
  599 
  600         mov     ds, dx                  ; restore ds
  601         mov     ax, word ptr r          ; return r in ax
  602         ret
  603 double_bn   ENDP
  604 
  605 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  606 ; r *= 2
  607 double_a_bn   PROC r:bn_t
  608 
  609         mov     ax, ds                  ; save ds
  610         mov     cx, bnlength
  611         mov     bx, WORD PTR r
  612 
  613 IFDEF BIG16AND32
  614         cmp     cpu, 386
  615         jae     short use_32_bit        ; use faster 32 bit code if possible
  616 ENDIF
  617 
  618 IFDEF BIG16
  619         mov     ds, bignum_seg          ; load ds
  620 
  621         shr     cx, 1                   ; byte = 1/2 word
  622         clc
  623 
  624 top_loop_16:
  625         rcl     word ptr ds:[bx], 1     ; rotate with carry left
  626 
  627                                         ; inc does not change carry flag
  628         inc     bx                      ; add  bx, 2
  629         inc     bx
  630 
  631         loop    top_loop_16
  632 ENDIF
  633 
  634 IFDEF BIG16AND32
  635         jmp     short bottom
  636 ENDIF
  637 
  638 IFDEF BIG32
  639 use_32_bit:
  640 .386
  641         mov     ds, bignum_seg          ; load ds
  642 
  643         shr     cx, 2                   ; byte = 1/4 dword
  644         clc                             ; clear carry flag
  645 
  646 top_loop_32:
  647         rcl     dword ptr ds:[bx], 1    ; rotate with carry left
  648 
  649         inc     bx                      ; add bx, 4 but keep carry flag
  650         inc     bx
  651         inc     bx
  652         inc     bx
  653 
  654         loop    top_loop_32
  655 ENDIF
  656 
  657 bottom:
  658 .8086
  659 
  660         mov     ds, ax                  ; restore ds
  661         mov     ax, word ptr r          ; return r in ax
  662         ret
  663 double_a_bn   ENDP
  664 
  665 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  666 ; r = n/2
  667 half_bn   PROC USES di, r:bn_t, n:bn_t
  668 
  669         mov     dx, ds                  ; save ds
  670         mov     cx, bnlength
  671         mov     di, WORD PTR r
  672         mov     bx, WORD PTR n
  673 
  674         add     di, cx                  ; start with msb
  675         add     bx, cx
  676 
  677 IFDEF BIG16AND32
  678         cmp     cpu, 386
  679         jae     short use_32_bit        ; use faster 32 bit code if possible
  680 ENDIF
  681 
  682 IFDEF BIG16
  683         mov     ds, bignum_seg          ; load ds
  684 
  685         shr     cx, 1                   ; byte = 1/2 word
  686 
  687         ; handle the first step with sar, the rest with rcr
  688         sub     di, 2
  689         sub     bx, 2
  690 
  691         mov     ax, ds:[bx]
  692         sar     ax, 1                   ; shift arithmetic right
  693         mov     ds:[di], ax
  694 
  695         loop    top_loop_16
  696         jmp     short bottom
  697 
  698 
  699 top_loop_16:
  700                                         ; inc does not change carry flag
  701         dec     di                      ; sub  di, 2
  702         dec     di
  703         dec     bx                      ; sub bx, 2
  704         dec     bx
  705 
  706         mov     ax, ds:[bx]
  707         rcr     ax, 1                   ; rotate with carry right
  708         mov     ds:[di], ax
  709 
  710         loop    top_loop_16
  711 ENDIF
  712 
  713 IFDEF BIG16AND32
  714         jmp     short bottom
  715 ENDIF
  716 
  717 IFDEF BIG32
  718 use_32_bit:
  719 .386
  720         mov     ds, bignum_seg          ; load ds
  721 
  722         shr     cx, 2                   ; byte = 1/4 dword
  723 
  724         sub     di, 4                   ; decrement by double word size
  725         sub     bx, 4
  726 
  727         mov     eax, ds:[bx]
  728         sar     eax, 1                  ; shift arithmetic right
  729         mov     ds:[di], eax
  730 
  731         loop    top_loop_32
  732         jmp     short bottom
  733 
  734 top_loop_32:
  735         lahf                            ; save carry flag
  736         sub     di, 4                   ; decrement by double word size
  737         sub     bx, 4
  738         sahf                            ; restore carry flag
  739 
  740         mov     eax, ds:[bx]
  741         rcr     eax, 1                  ; rotate with carry right
  742         mov     ds:[di], eax
  743 
  744         loop    top_loop_32
  745 ENDIF
  746 
  747 bottom:
  748 .8086
  749 
  750         mov     ds, dx                  ; restore ds
  751         mov     ax, word ptr r          ; return r in ax
  752         ret
  753 half_bn   ENDP
  754 
  755 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  756 ; r /= 2
  757 half_a_bn   PROC r:bn_t
  758 
  759         mov     ax, ds                  ; save ds
  760         mov     cx, bnlength
  761         mov     bx, WORD PTR r
  762 
  763         add     bx, cx                  ; start with msb
  764 
  765 
  766 IFDEF BIG16AND32
  767         cmp     cpu, 386
  768         jae     short use_32_bit        ; use faster 32 bit code if possible
  769 ENDIF
  770 
  771 IFDEF BIG16
  772         mov     ds, bignum_seg          ; load ds
  773 
  774         shr     cx, 1                   ; byte = 1/2 word
  775 
  776         ; handle the first step with sar, the rest with rcr
  777         sub     bx, 2
  778 
  779         sar     word ptr ds:[bx], 1     ; shift arithmetic right
  780 
  781         loop    top_loop_16
  782         jmp     short bottom
  783 
  784 
  785 top_loop_16:
  786                                         ; inc does not change carry flag
  787         dec     bx                      ; sub bx, 2
  788         dec     bx
  789 
  790         rcr     word ptr ds:[bx], 1     ; rotate with carry right
  791 
  792         loop    top_loop_16
  793 ENDIF
  794 
  795 IFDEF BIG16AND32
  796         jmp     short bottom
  797 ENDIF
  798 
  799 IFDEF BIG32
  800 use_32_bit:
  801 .386
  802         mov     ds, bignum_seg          ; load ds
  803 
  804         shr     cx, 2                   ; byte = 1/4 dword
  805         sub     bx, 4                   ; decrement by double word size
  806         sar     dword ptr ds:[bx], 1    ; shift arithmetic right
  807 
  808         loop    top_loop_32
  809         jmp     short bottom
  810 
  811 top_loop_32:
  812         dec     bx                      ; sub bx, 4 but keep carry flag
  813         dec     bx
  814         dec     bx
  815         dec     bx
  816 
  817         rcr     dword ptr ds:[bx], 1       ; rotate with carry right
  818 
  819         loop    top_loop_32
  820 ENDIF
  821 
  822 bottom:
  823 .8086
  824 
  825         mov     ds, ax                  ; restore ds
  826         mov     ax, word ptr r          ; return r in ax
  827         ret
  828 half_a_bn   ENDP
  829 
  830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  831 ; r = n1 * n2
  832 ; Note: r will be a double wide result, 2*bnlength
  833 ;       n1 and n2 can be the same pointer
  834 ; SIDE-EFFECTS: n1 and n2 are changed to their absolute values
  835 ;
  836 unsafe_full_mult_bn   PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t
  837 LOCAL sign1:byte, sign2:byte, samevar:byte, \
  838       i:word, j:word, steps:word, doublesteps:word, carry_steps:word, \
  839       n1p: near ptr byte, n2p: near ptr byte
  840 
  841         push    ds                          ; save ds
  842         mov     es, bignum_seg              ; load es for when ds is a pain
  843 
  844 ; Test to see if n1 and n2 are the same variable.  It would be better to
  845 ; use square_bn(), but it could happen.
  846 
  847         mov     samevar, 0                  ; assume they are not the same
  848         mov     bx, word ptr n1
  849         cmp     bx, word ptr n2             ; compare offset
  850         jne     end_samevar_check           ; not the same
  851         mov     samevar, 1                  ; they are the same
  852 end_samevar_check:
  853 
  854 ; By forcing the bignumber to be positive and keeping track of the sign
  855 ; bits separately, quite a few multiplies are saved.
  856 
  857                                             ; check for sign bits
  858         add     bx, bnlength
  859         mov     al, es:[bx-1]
  860         and     al, 80h                     ; check the sign bit
  861         mov     sign1, al
  862         jz      already_pos1
  863         invoke  neg_a_bn, n1
  864 already_pos1:
  865 
  866         cmp     samevar, 1                  ; if it's the same variable
        je      already_pos2                ; then skip this second check
        mov     bx, word ptr n2
        add     bx, bnlength
        mov     al, es:[bx-1]
        and     al, 80h                     ; check the sign bit
        mov     sign2, al
        jz      already_pos2
        invoke  neg_a_bn, n2
already_pos2:

; in the following loops, the following pointers are used
;   n1p, n2p = points to the part of n1, n2 being used
;   di = points to part of doublebignumber r used in outer loop
;   si = points to part of doublebignumber r used in inner loop
;   bx = points to part of doublebignumber r for carry flag loop
; Also, since r is used more than n1p or n2p, abandon the convention of
; using ES for r.  Using DS will save a few clock cycles.

IFDEF BIG16AND32
        cmp     cpu, 386                ; check cpu
;        jae     use_32_bit              ; use faster 32 bit code if possible
        jb      wont_use_32bit
        jmp     use_32_bit              ; use faster 32 bit code if possible
wont_use_32bit:
ENDIF

IFDEF BIG16
        ; set variables
        mov     dx, bnlength            ; set outer loop counter
        shr     dx, 1                   ; byte = 1/2 word
        mov     steps, dx               ; save in steps
        mov     i, dx
        shl     dx, 1                   ; double steps

        ; clear r
        sub     ax, ax                  ; clear ax
        mov     cx, dx                  ; size of doublebignumber (r) in words
        mov     di, word ptr r          ; load r in es:di for stos
        rep     stosw                   ; initialize r to 0

        sub     dx, 2                   ; only 2*s-2 steps are really needed
        mov     doublesteps, dx
        mov     carry_steps, dx

        ; prepare segments and offsets for loops
        mov     di, word ptr r
        mov     si, di                  ; both si and di are used here
        mov     ds, bignum_seg          ; load ds
        mov     ax, word ptr n1         ; load pointers
        mov     n1p, ax
        ; use ds for all pointers

top_outer_loop_16:
        mov     ax, word ptr n2         ; set n2p pointer
        mov     n2p, ax
        mov     ax, steps               ; set inner loop counter
        mov     j, ax

top_inner_loop_16:
        mov     bx, n1p
        mov     ax, ds:[bx]
        mov     bx, n2p
        mul     word ptr ds:[bx]

        mov     bx, si
        add     bx, 2                   ; increase by size of word
        add     ds:[bx-2], ax           ; add low word
        adc     ds:[bx], dx             ; add high word
        jnc     no_more_carry_16        ; carry loop not necessary

        mov     cx, carry_steps         ; how many till end of double big number
        jcxz    no_more_carry_16
        add     bx, 2                   ; move pointer to next word

        ; loop until no more carry or until end of double big number
top_carry_loop_16:
        add     word ptr ds:[bx], 1     ; use add, not inc
        jnc     no_more_carry_16
        add     bx, 2                   ; increase by size of word
        loop    top_carry_loop_16

no_more_carry_16:
        add     n2p, 2                  ; increase by word size
        add     si, 2
        dec     carry_steps             ; use one less step
        dec     j
        ja      top_inner_loop_16

        add     n1p, 2                  ; increase by word size
        add     di, 2
        mov     si, di                  ; start with si=di

        dec     doublesteps             ; reduce the carry steps needed
        mov     ax, doublesteps
        mov     carry_steps, ax


        dec     i
        ja      top_outer_loop_16

        ; result is now r, a double wide bignumber
ENDIF

IFDEF BIG16AND32
        jmp     bottom
ENDIF

IFDEF BIG32
use_32_bit:
.386
        ; set variables
        mov     dx, bnlength            ; set outer loop counter
        shr     dx, 2                   ; byte = 1/4 dword
        mov     steps, dx               ; save in steps
        mov     i, dx
        shl     dx, 1                   ; double steps

        ; clear r
        sub     eax, eax                ; clear eax
        mov     cx, dx                  ; size of doublebignumber in dwords
        mov     di, word ptr r          ; load r in es:di for stos
        rep     stosd                   ; initialize r to 0

        sub     dx, 2                   ; only 2*s-2 steps are really needed
        mov     doublesteps, dx
        mov     carry_steps, dx

        ; prepare segments and offsets for loops
        mov     di, word ptr r
        mov     si, di                  ; both si and di are used here
        mov     ds, bignum_seg          ; load ds
        mov     ax, word ptr n1         ; load pointers
        mov     n1p, ax

top_outer_loop_32:
        mov     ax, word ptr n2         ; set n2p pointer
        mov     n2p, ax
        mov     ax, steps               ; set inner loop counter
        mov     j, ax

top_inner_loop_32:
        mov     bx, n1p
        mov     eax, ds:[bx]
        mov     bx, n2p
        mul     dword ptr ds:[bx]

        mov     bx, si
        add     bx, 4                   ; increase by size of dword
        add     ds:[bx-4], eax          ; add low dword
        adc     ds:[bx], edx            ; add high dword
        jnc     no_more_carry_32        ; carry loop not necessary

        mov     cx, carry_steps         ; how many till end of double big number
        jcxz    no_more_carry_32
        add     bx, 4                   ; move pointer to next dword

        ; loop until no more carry or until end of double big number
top_carry_loop_32:
        add     dword ptr ds:[bx], 1    ; use add, not inc
        jnc     no_more_carry_32
        add     bx, 4                   ; increase by size of dword
        loop    top_carry_loop_32

no_more_carry_32:
        add     n2p, 4                  ; increase by dword size
        add     si, 4
        dec     carry_steps             ; use one less step
        dec     j
        ja      top_inner_loop_32

        add     n1p, 4                  ; increase by dword size
        add     di, 4
        mov     si, di                  ; start with si=di

        dec     doublesteps             ; reduce the carry steps needed
        mov     ax, doublesteps
        mov     carry_steps, ax


        dec     i
        ja      top_outer_loop_32

        ; result is now r, a double wide bignumber
ENDIF

bottom:
.8086

        pop     ds                      ; restore ds
        cmp     samevar, 1              ; were the variable the same ones?
        je      pos_answer              ; if yes, then jump

        mov     al, sign1               ; is result + or - ?
        cmp     al, sign2               ; sign(n1) == sign(n2) ?
        je      pos_answer              ; yes
        shl     bnlength, 1             ; temporarily double bnlength
                                        ; for double wide bignumber
        invoke  neg_a_bn, r             ; does not affect ES
        shr     bnlength, 1             ; restore bnlength
pos_answer:

        mov     ax, word ptr r          ; return r in ax
        ret
unsafe_full_mult_bn   ENDP

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; r = n1 * n2 calculating only the top rlength bytes
; Note: r will be of length rlength
;       2*bnlength <= rlength < bnlength
;       n1 and n2 can be the same pointer
; SIDE-EFFECTS: n1 and n2 are changed to their absolute values
;
unsafe_mult_bn   PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t
LOCAL sign1:byte, sign2:byte, samevar:byte, \
      i:word, j:word, steps:word, doublesteps:word, \
      carry_steps:word, skips:word, \
      n1p: ptr byte, n2p: ptr byte

        push    ds                          ; save ds
        mov     es, bignum_seg              ; load es for when ds is a pain

; Test to see if n1 and n2 are the same variable.  It would be better to
; use square_bn(), but it could happen.

        mov     samevar, 0                  ; assume they are not the same
        mov     bx, word ptr n1
        cmp     bx, word ptr n2             ; compare offset
        jne     end_samevar_check           ; not the same
        mov     samevar, 1                  ; they are the same
end_samevar_check:

; By forcing the bignumber to be positive and keeping track of the sign
; bits separately, quite a few multiplies are saved.

                                            ; check for sign bits
        add     bx, bnlength
        mov     al, es:[bx-1]
        and     al, 80h                     ; check the sign bit
        mov     sign1, al
        jz      already_pos1
        invoke  neg_a_bn, n1
already_pos1:

        cmp     samevar, 1                  ; if it's the same variable
  867         je      already_pos2                ; then skip this second check
  868         mov     bx, word ptr n2
  869         add     bx, bnlength
  870         mov     al, es:[bx-1]
  871         and     al, 80h                     ; check the sign bit
  872         mov     sign2, al
  873         jz      already_pos2
  874         invoke  neg_a_bn, n2
  875 already_pos2:
  876 
  877         ; adjust n2 pointer for partial precision
  878         mov     ax, bnlength
  879         shl     ax, 1                   ; 2*bnlength
  880         sub     ax, rlength             ; 2*bnlength-rlength
  881         add     word ptr n2, ax         ; n2 = n2+2*bnlength-rlength
  882 
  883 
  884 ; in the following loops, the following pointers are used
  885 ;   n1p, n2p = points to the part of n1, n2 being used
  886 ;   di = points to part of doublebignumber used in outer loop
  887 ;   si = points to part of doublebignumber used in inner loop
  888 ;   bx = points to part of doublebignumber for carry flag loop
  889 ; Also, since r is used more than n1p or n2p, abandon the convention of
  890 ; using ES for r.  Using DS will save a few clock cycles.
  891 
  892 IFDEF BIG16AND32
  893         cmp     cpu, 386                ; check cpu
  894 ;        jae     use_32_bit              ; use faster 32 bit code if possible
  895         jb      cant_use_32bit
  896         jmp     use_32_bit              ; use faster 32 bit code if possible
  897 cant_use_32bit:
  898 ENDIF
  899 
  900 IFDEF BIG16
  901         ; clear r
  902         sub     ax, ax                  ; clear ax
  903         mov     cx, rlength             ; size of r in bytes
  904         shr     cx, 1                   ; byte = 1/2 word
  905         mov     di, word ptr r          ; load r in es:di for stos
  906         rep     stosw                   ; initialize r to 0
  907 
  908         ; set variables
  909         mov     ax, rlength             ; set steps for first loop
  910         sub     ax, bnlength
  911         shr     ax, 1                   ; byte = 1/2 word
  912         mov     steps, ax               ; save in steps
  913 
  914         mov     ax, bnlength
  915         shr     ax, 1                   ; byte = 1/2 word
  916         mov     i, ax
  917 
  918         sub     ax, steps
  919         mov     skips, ax               ; how long to skip over pointer shifts
  920 
  921         mov     ax, rlength             ; set steps for first loop
  922         shr     ax, 1                   ; byte = 1/2 word
  923         sub     ax, 2                   ; only rlength/2-2 steps are really needed
  924         mov     doublesteps, ax
  925         mov     carry_steps, ax
  926 
  927         ; prepare segments and offsets for loops
  928         mov     di, word ptr r
  929         mov     si, di                  ; both si and di are used here
  930         mov     ds, bignum_seg          ; load ds
  931         mov     ax, word ptr n1         ; load pointers
  932         mov     n1p, ax
  933         ; use ds for all pointers
  934 
  935 
  936 top_outer_loop_16:
  937         mov     ax, word ptr n2         ; set n2p pointer
  938         mov     n2p, ax
  939         mov     ax, steps               ; set inner loop counter
  940         mov     j, ax
  941 
  942 top_inner_loop_16:
  943         mov     bx, n1p
  944         mov     ax, ds:[bx]
  945         mov     bx, n2p
  946         mul     word ptr ds:[bx]
  947 
  948         mov     bx, si
  949         add     bx, 2                   ; increase by size of word
  950         add     ds:[bx-2], ax           ; add low word
  951         adc     ds:[bx], dx             ; add high word
  952         jnc     no_more_carry_16        ; carry loop not necessary
  953 
  954         mov     cx, carry_steps         ; how many till end of double big number
  955         jcxz    no_more_carry_16
  956         add     bx, 2                   ; move pointer to next word
  957 
  958         ; loop until no more carry or until end of double big number
  959 top_carry_loop_16:
  960         add     word ptr ds:[bx], 1     ; use add, not inc
  961         jnc     no_more_carry_16
  962         add     bx, 2                   ; increase by size of word
  963         loop    top_carry_loop_16
  964 
  965 no_more_carry_16:
  966         add     n2p, 2                  ; increase by word size
  967         add     si, 2
  968         dec     carry_steps             ; use one less step
  969         dec     j
  970         ja      top_inner_loop_16
  971 
  972         add     n1p, 2                  ; increase by word size
  973 
  974         cmp     skips, 0
  975         je      type2_shifts_16
  976         sub     word ptr n2, 2          ; shift n2 back a word
  977         inc     steps                   ; one more step this time
  978         ; leave di and doublesteps where they are
  979         dec     skips                   ; keep track of how many times we've done this
        jmp     shifts_bottom_16
type2_shifts_16:
        add     di, 2                   ; shift di forward a word
        dec     doublesteps             ; reduce the carry steps needed
shifts_bottom_16:
        mov     si, di                  ; start with si=di
        mov     ax, doublesteps
        mov     carry_steps, ax

        dec     i
        ja      top_outer_loop_16

        ; result is in r
ENDIF

IFDEF BIG16AND32
        jmp     bottom
ENDIF

IFDEF BIG32
use_32_bit:
.386

        ; clear r
        sub     eax, eax                ; clear eax
        mov     cx, rlength             ; size of r in bytes
        shr     cx, 2                   ; byte = 1/4 dword
        mov     di, word ptr r          ; load r in es:di for stos
        rep     stosd                   ; initialize r to 0

        ; set variables
        mov     ax, rlength             ; set steps for first loop
        sub     ax, bnlength
        shr     ax, 2                   ; byte = 1/4 dword
        mov     steps, ax               ; save in steps

        mov     ax, bnlength
        shr     ax, 2                   ; byte = 1/4 dword
        mov     i, ax

        sub     ax, steps
        mov     skips, ax               ; how long to skip over pointer shifts

        mov     ax, rlength             ; set steps for first loop
        shr     ax, 2                   ; byte = 1/4 dword
        sub     ax, 2                   ; only rlength/4-2 steps are really needed
        mov     doublesteps, ax
        mov     carry_steps, ax

        ; prepare segments and offsets for loops
        mov     di, word ptr r
        mov     si, di                  ; both si and di are used here
        mov     ds, bignum_seg          ; load ds
        mov     ax, word ptr n1         ; load pointers
        mov     n1p, ax


top_outer_loop_32:
        mov     ax, word ptr n2         ; set n2p pointer
        mov     n2p, ax
        mov     ax, steps               ; set inner loop counter
        mov     j, ax

top_inner_loop_32:
        mov     bx, n1p
        mov     eax, ds:[bx]
        mov     bx, n2p
        mul     dword ptr ds:[bx]

        mov     bx, si
        add     bx, 4                   ; increase by size of dword
        add     ds:[bx-4], eax          ; add low dword
        adc     ds:[bx], edx            ; add high dword
        jnc     no_more_carry_32        ; carry loop not necessary

        mov     cx, carry_steps         ; how many till end of double big number
        jcxz    no_more_carry_32
        add     bx, 4                   ; move pointer to next dword

        ; loop until no more carry or until end of r
top_carry_loop_32:
        add     dword ptr ds:[bx], 1    ; use add, not inc
        jnc     no_more_carry_32
        add     bx, 4                   ; increase by size of dword
        loop    top_carry_loop_32

no_more_carry_32:
        add     n2p, 4                  ; increase by dword size
        add     si, 4
        dec     carry_steps             ; use one less step
        dec     j
        ja      top_inner_loop_32

        add     n1p, 4                  ; increase by dword size

        cmp     skips, 0
        je      type2_shifts_32
        sub     word ptr n2, 4          ; shift n2 back a dword
        inc     steps                   ; one more step this time
        ; leave di and doublesteps where they are
        dec     skips                   ; keep track of how many times we've done this
  980         jmp     shifts_bottom_32
  981 type2_shifts_32:
  982         add     di, 4                   ; shift di forward a dword
  983         dec     doublesteps             ; reduce the carry steps needed
  984 shifts_bottom_32:
  985         mov     si, di                  ; start with si=di
  986         mov     ax, doublesteps
  987         mov     carry_steps, ax
  988 
  989         dec     i
  990         ja      top_outer_loop_32
  991 
  992         ; result is in r
  993 ENDIF
  994 
  995 bottom:
  996 .8086
  997         pop     ds                      ; restore ds
  998         cmp     samevar, 1              ; were the variable the same ones?
  999         je      pos_answer              ; if yes, then jump
 1000 
 1001         mov     al, sign1               ; is result + or - ?
 1002         cmp     al, sign2               ; sign(n1) == sign(n2) ?
 1003         je      pos_answer              ; yes
 1004         push    bnlength                ; save bnlength
 1005         mov     ax, rlength
 1006         mov     bnlength, ax            ; set bnlength = rlength
 1007         invoke  neg_a_bn, r             ; does not affect ES
 1008         pop     bnlength                ; restore bnlength
 1009 pos_answer:
 1010 
 1011         mov     ax, word ptr r          ; return r in ax
 1012         ret
 1013 unsafe_mult_bn   ENDP
 1014 
 1015 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 1016 ; r = n^2
 1017 ;   because of the symetry involved, n^2 is much faster than n*n
 1018 ;   for a bignumber of length l
 1019 ;      n*n takes l^2 multiplications
 1020 ;      n^2 takes (l^2+l)/2 multiplications
 1021 ;          which is about 1/2 n*n as l gets large
 1022 ;  uses the fact that (a+b+c+...)^2 = (a^2+b^2+c^2+...)+2(ab+ac+bc+...)
 1023 ;
 1024 ; Note: r will be a double wide result, 2*bnlength
 1025 ; SIDE-EFFECTS: n is changed to its absolute value
 1026 ;
 1027 unsafe_full_square_bn   PROC USES di si, r:bn_t, n:bn_t
 1028 LOCAL i:word, j:word, steps:word, doublesteps:word, carry_steps:word, \
 1029       save_ds:word, \
 1030       rp1: ptr byte, rp2: ptr byte
 1031 
 1032         mov     save_ds, ds                 ; save ds
 1033         mov     es, bignum_seg              ; load es for when ds is a pain
 1034 
 1035 ; By forcing the bignumber to be positive and keeping track of the sign
 1036 ; bits separately, quite a few multiplies are saved.
 1037 
 1038                                             ; check for sign bit
 1039         mov     bx, word ptr n
 1040         add     bx, bnlength
 1041         mov     al, es:[bx-1]
 1042         and     al, 80h                     ; check the sign bit
 1043         jz      already_pos
 1044         invoke  neg_a_bn, n
 1045 already_pos:
 1046 
 1047 ; in the following loops, the following pointers are used
 1048 ;   n1p(di), n2p(si) = points to the parts of n being used (es)
 1049 ;   rp1 = points to part of doublebignumber used in outer loop  (ds)
 1050 ;   rp2 = points to part of doublebignumber used in inner loop  (ds)
 1051 ;   bx  = points to part of doublebignumber for carry flag loop (ds)
 1052 
 1053         mov     cx, bnlength            ; size of doublebignumber in words
 1054 
 1055 IFDEF BIG16AND32
 1056         cmp     cpu, 386                ; check cpu
 1057 ;        jae     use_32_bit              ; use faster 32 bit code if possible
 1058         jb      dont_use_32bit
 1059         jmp     use_32_bit              ; use faster 32 bit code if possible
 1060 dont_use_32bit:
 1061 ENDIF
 1062 
 1063 IFDEF BIG16
 1064         ; clear r
 1065         sub     ax, ax                  ; clear ax
 1066         ; 2{twice the size}*bnlength/2{bytes per word}
 1067         mov     di, word ptr r          ; load r pointer in es:di for stos
 1068         rep     stosw                   ; initialize r to 0
 1069 
 1070         ; initialize vars
 1071         mov     dx, bnlength            ; set outer loop counter
 1072         shr     dx, 1                   ; byte = 1/2 word
 1073         dec     dx                      ; don't need to do last one
        mov     i, dx                   ; loop counter
        mov     steps, dx               ; save in steps
        shl     dx, 1                   ; double steps
        sub     dx, 1                   ; only 2*s-1 steps are really needed
        mov     doublesteps, dx
        mov     carry_steps, dx

        ; initialize pointers
        mov     di, word ptr n
        mov     ax, word ptr r
        mov     ds, bignum_seg          ; load ds
        add     ax, 2                   ; start with second word
        mov     rp1, ax
        mov     rp2, ax                 ; start with rp2=rp1

        cmp     i, 0                    ; if bignumberlength is 2
        je      skip_middle_terms_16

top_outer_loop_16:
        mov     si, di                  ; set n2p pointer
        add     si, 2                   ; to 1 word beyond n1p(di)
        mov     ax, steps               ; set inner loop counter
        mov     j, ax

top_inner_loop_16:
        mov     ax, ds:[di]
        mul     word ptr ds:[si]

        mov     bx, rp2
        add     bx, 2                   ; increase by size of word
        add     ds:[bx-2], ax           ; add low word
        adc     ds:[bx], dx             ; add high word
        jnc     no_more_carry_16        ; carry loop not necessary

        mov     cx, carry_steps         ; how many till end of double big number
        jcxz    no_more_carry_16
        add     bx, 2                   ; move pointer to next word

        ; loop until no more carry or until end of double big number
top_carry_loop_16:
        add     word ptr ds:[bx], 1     ; use add, not inc
        jnc     no_more_carry_16
        add     bx, 2                   ; increase by size of word
        loop    top_carry_loop_16

no_more_carry_16:
        add     si, 2                   ; increase by word size
        add     rp2, 2
        dec     carry_steps             ; use one less step
        dec     j
        ja      top_inner_loop_16

        add     di, 2                   ; increase by word size
        add     rp1, 4                  ; increase by 2*word size
        mov     ax, rp1
        mov     rp2, ax                 ; start with rp2=rp1

        sub     doublesteps,2           ; reduce the carry steps needed
        mov     ax, doublesteps
        mov     carry_steps, ax

        dec     steps                   ; use one less step
        dec     i
        ja      top_outer_loop_16

        ; All the middle terms have been multiplied.  Now double it.
        mov     ds, save_ds             ; restore ds to get bnlength
        shl     bnlength, 1             ; r is a double wide bignumber
        invoke  double_a_bn, r          ; doesn't change es
 1074         shr     bnlength, 1             ; restore r
 1075 
 1076 skip_middle_terms_16:                   ; ds is not necessarily restored here
 1077 
 1078 ; Now go back and add in the squared terms.
 1079 ; In the following loops, the following pointers are used
 1080 ;   n1p(di) = points to the parts of n being used (es)
 1081 ;   rp1(si) = points to part of doublebignumber used in outer loop  (ds)
 1082 ;   bx      = points to part of doublebignumber for carry flag loop (ds)
 1083 
 1084         mov     di, word ptr n          ; load n1p pointer in di
 1085 
 1086         mov     ds, save_ds             ; restore ds to get bnlength
 1087         mov     dx, bnlength            ; set outer loop counter
 1088         shr     dx, 1                   ; 1 bytes = 1/2 word
 1089         mov     i, dx                   ; loop counter
 1090         shl     dx, 1                   ; double steps
 1091 
 1092         sub     dx, 2                   ; only 2*s-2 steps are really needed
 1093         mov     doublesteps, dx
 1094         mov     carry_steps, dx
 1095         mov     si, word ptr r          ; set rp1
 1096         mov     ds, bignum_seg          ; load ds
 1097 
 1098 
 1099 top_outer_loop_squares_16:
 1100 
 1101         mov     ax, ds:[di]
 1102         mul     ax                      ; square it
 1103 
 1104         mov     bx, si
 1105         add     bx, 2                   ; increase by size of word
 1106         add     ds:[bx-2], ax           ; add low word
 1107         adc     ds:[bx], dx             ; add high word
 1108         jnc     no_more_carry_squares_16 ; carry loop not necessary
 1109 
 1110         mov     cx, carry_steps         ; how many till end of double big number
 1111         jcxz    no_more_carry_squares_16
 1112         add     bx, 2                   ; move pointer to next word
 1113 
 1114         ; loop until no more carry or until end of double big number
 1115 top_carry_loop_squares_16:
 1116         add     word ptr ds:[bx], 1     ; use add, not inc
 1117         jnc     no_more_carry_squares_16
 1118         add     bx, 2                   ; increase by size of word
 1119         loop    top_carry_loop_squares_16
 1120 
 1121 no_more_carry_squares_16:
 1122         add     di, 2                   ; increase by word size
 1123         add     si, 4                   ; increase by 2*word size
 1124 
 1125         sub     doublesteps,2           ; reduce the carry steps needed
 1126         mov     ax, doublesteps
 1127         mov     carry_steps, ax
 1128 
 1129         dec     i
 1130         ja      top_outer_loop_squares_16
 1131 
 1132 
 1133         ; result is in r, a double wide bignumber
 1134 ENDIF
 1135 
 1136 IFDEF BIG16AND32
 1137         jmp     bottom
 1138 ENDIF
 1139 
 1140 IFDEF BIG32
 1141 use_32_bit:
 1142 .386
 1143         ; clear r
 1144         sub     eax, eax                ; clear eax
 1145         ; 2{twice the size}*bnlength/4{bytes per word}
 1146         shr     cx, 1                   ; size of doublebignumber in dwords
 1147         mov     di, word ptr r          ; load r pointer in es:di for stos
 1148         rep     stosd                   ; initialize r to 0
 1149 
 1150         ; initialize vars
 1151         mov     dx, bnlength            ; set outer loop counter
 1152         shr     dx, 2                   ; byte = 1/4 dword
 1153         dec     dx                      ; don't need to do last one
        mov     i, dx                   ; loop counter
        mov     steps, dx               ; save in steps
        shl     dx, 1                   ; double steps
        sub     dx, 1                   ; only 2*s-1 steps are really needed
        mov     doublesteps, dx
        mov     carry_steps, dx

        ; initialize pointers
        mov     di, word ptr n          ; load n1p pointer
        mov     ax, word ptr r
        mov     ds, bignum_seg          ; load ds

        add     ax, 4                   ; start with second dword
        mov     rp1, ax
        mov     rp2, ax                 ; start with rp2=rp1

        cmp     i, 0                    ; if bignumberlength is 4
        je      skip_middle_terms_32

top_outer_loop_32:
        mov     si, di                  ; set n2p pointer
        add     si, 4                   ; to 1 dword beyond n1p(di)
        mov     ax, steps               ; set inner loop counter
        mov     j, ax

top_inner_loop_32:
        mov     eax, ds:[di]
        mul     dword ptr ds:[si]

        mov     bx, rp2
        add     bx, 4                   ; increase by size of dword
        add     ds:[bx-4], eax          ; add low dword
        adc     ds:[bx], edx            ; add high dword
        jnc     no_more_carry_32        ; carry loop not necessary

        mov     cx, carry_steps         ; how many till end of double big number
        jcxz    no_more_carry_32
        add     bx, 4                   ; move pointer to next dword

        ; loop until no more carry or until end of double big number
top_carry_loop_32:
        add     dword ptr ds:[bx], 1    ; use add, not inc
        jnc     no_more_carry_32
        add     bx, 4                   ; increase by size of dword
        loop    top_carry_loop_32

no_more_carry_32:
        add     si, 4                   ; increase by dword size
        add     rp2, 4
        dec     carry_steps             ; use one less step
        dec     j
        ja      top_inner_loop_32

        add     di, 4                   ; increase by dword size
        add     rp1, 8                  ; increase by 2*dword size
        mov     ax, rp1
        mov     rp2, ax                 ; start with rp2=rp1

        sub     doublesteps,2           ; reduce the carry steps needed
        mov     ax, doublesteps
        mov     carry_steps, ax

        dec     steps                   ; use one less step
        dec     i
        ja      top_outer_loop_32

        ; All the middle terms have been multiplied.  Now double it.
        mov     ds, save_ds             ; restore ds to get bnlength
        shl     bnlength, 1             ; r is a double wide bignumber
        invoke  double_a_bn, r
        shr     bnlength, 1             ; restore r

skip_middle_terms_32:                   ; ds is not necessarily restored here

; Now go back and add in the squared terms.
; In the following loops, the following pointers are used
;   n1p(di) = points to the parts of n being used (es)
;   rp1(si) = points to part of doublebignumber used in outer loop (ds)
;   bx = points to part of doublebignumber for carry flag loop     (ds)

        mov     di, word ptr n          ; load n1p pointer in ds:di

        mov     ds, save_ds             ; restore ds to get bnlength
        mov     dx, bnlength            ; set outer loop counter
        shr     dx, 2                   ; 1 bytes = 1/4 dword
        mov     i, dx                   ; loop counter
        shl     dx, 1                   ; double steps

        sub     dx, 2                   ; only 2*s-2 steps are really needed
        mov     doublesteps, dx
        mov     carry_steps, dx
        mov     si, word ptr r          ; set rp1
        mov     ds, bignum_seg          ; load ds

top_outer_loop_squares_32:

        mov     eax, ds:[di]
        mul     eax                     ; square it

        mov     bx, si
        add     bx, 4                   ; increase by size of dword
        add     ds:[bx-4], eax          ; add low dword
        adc     ds:[bx], edx            ; add high dword
        jnc     no_more_carry_squares_32 ; carry loop not necessary

        mov     cx, carry_steps         ; how many till end of double big number
        jcxz    no_more_carry_squares_32
        add     bx, 4                   ; move pointer to next dword

        ; loop until no more carry or until end of double big number
top_carry_loop_squares_32:
        add     dword ptr ds:[bx], 1    ; use add, not inc
        jnc     no_more_carry_squares_32
        add     bx, 4                   ; increase by size of dword
        loop    top_carry_loop_squares_32

no_more_carry_squares_32:
        add     di, 4                   ; increase by dword size
        add     si, 8                   ; increase by 2*dword size

        sub     doublesteps,2           ; reduce the carry steps needed
        mov     ax, doublesteps
        mov     carry_steps, ax

        dec     i
        ja      top_outer_loop_squares_32


        ; result is in r, a double wide bignumber
ENDIF

bottom:
.8086

; since it is a square, the result has to already be positive

        mov     ds, save_ds             ; restore ds
        mov     ax, word ptr r          ; return r in ax
        ret
unsafe_full_square_bn   ENDP

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; r = n^2
;   because of the symetry involved, n^2 is much faster than n*n
;   for a bignumber of length l
;      n*n takes l^2 multiplications
;      n^2 takes (l^2+l)/2 multiplications
;          which is about 1/2 n*n as l gets large
;  uses the fact that (a+b+c+...)^2 = (a^2+b^2+c^2+...)+2(ab+ac+bc+...)
;
; Note: r will be of length rlength
;       2*bnlength >= rlength > bnlength
; SIDE-EFFECTS: n is changed to its absolute value
;
unsafe_square_bn   PROC USES di si, r:bn_t, n:bn_t
LOCAL i:word, j:word, steps:word, doublesteps:word, carry_steps:word, \
      skips:word, rodd:word, \
      save_ds:word, \
      n3p: ptr byte, \
      rp1: ptr byte, rp2: ptr byte

; This whole procedure would be a great deal simpler if we could assume that
; rlength < 2*bnlength (that is, not =).  Therefore, we will take the
; easy way out and call full_square_bn() if it is.
        mov     ax, rlength
        shr     ax, 1                   ; 1/2 * rlength
        cmp     ax, bnlength            ; 1/2 * rlength == bnlength?
        jne     not_full_square
        invoke  unsafe_full_square_bn, r, n
        ; dx:ax is still loaded with return value
        jmp     quit_proc               ; we're outa here
 1154 not_full_square:
 1155 
 1156         mov     save_ds, ds
 1157         mov     es, bignum_seg              ; load es for when ds is a pain
 1158 
 1159 ; By forcing the bignumber to be positive and keeping track of the sign
 1160 ; bits separately, quite a few multiplies are saved.
 1161 
 1162                                             ; check for sign bit
 1163         mov     bx, word ptr n              ; load n1 pointer in es:bx
 1164         add     bx, bnlength
 1165         mov     al, es:[bx-1]
 1166         and     al, 80h                     ; check the sign bit
 1167         jz      already_pos
 1168         invoke  neg_a_bn, n
 1169 already_pos:
 1170 
 1171 ; in the following loops, the following pointers are used
 1172 ;   n1p(di), n2p(si) = points to the parts of n being used (es)
 1173 ;   rp1 = points to part of doublebignumber used in outer loop  (ds)
 1174 ;   rp2 = points to part of doublebignumber used in inner loop  (ds)
 1175 ;   bx  = points to part of doublebignumber for carry flag loop (ds)
 1176 
 1177 IFDEF BIG16AND32
 1178         cmp     cpu, 386                ; check cpu
 1179 ;        jae     use_32_bit              ; use faster 32 bit code if possible
 1180         jb      skip_use_32bit
 1181         jmp     use_32_bit              ; use faster 32 bit code if possible
 1182 skip_use_32bit:
 1183 ENDIF
 1184 
 1185 IFDEF BIG16
 1186         ; clear r
 1187         sub     ax, ax                  ; clear ax
 1188         mov     cx, rlength             ; size of rlength in bytes
 1189         shr     cx, 1                   ; byte = 1/2 word
 1190         mov     di, word ptr r          ; load r pointer in es:di for stos
 1191         rep     stosw                   ; initialize r to 0
 1192 
 1193 
 1194         ; initialize vars
 1195 
 1196         ; determine whether r is on an odd or even word in the number
 1197         ; (even if rlength==2*bnlength, dec r alternates odd/even)
 1198         mov     ax, bnlength
 1199         shl     ax, 1                   ; double wide width
 1200         sub     ax, rlength             ; 2*bnlength-rlength
 1201         shr     ax, 1                   ; 1 byte = 1/2 word
 1202         and     ax, 0001h               ; check the odd sign bit
 1203         mov     rodd, ax
 1204 
 1205         mov     ax, bnlength            ; set outer loop counter
 1206         shr     ax, 1                   ; byte = 1/2 word
 1207         dec     ax                      ; don't need to do last one
        mov     i, ax                   ; loop counter

        mov     ax, rlength             ; set steps for first loop
        sub     ax, bnlength
        shr     ax, 1                   ; byte = 1/2 word
        mov     steps, ax               ; save in steps

        mov     dx, bnlength
        shr     dx, 1                   ; bnlength/2
        add     ax, dx                  ; steps+bnlength/2
        sub     ax, 2                   ; steps+bnlength/2-2
        mov     doublesteps, ax
        mov     carry_steps, ax

        mov     ax, i
        sub     ax, steps
        shr     ax, 1                   ; for both words and dwords
        mov     skips, ax               ; how long to skip over pointer shifts

        ; initialize pointers
        mov     di, word ptr n
        mov     si, di
        mov     ax, bnlength
        shr     ax, 1                   ; 1 byte = 1/2 word
        sub     ax, steps
        shl     ax, 1                   ; 1 byte = 1/2 word
        add     si, ax                  ; n2p = n1p + 2*(bnlength/2 - steps)
        mov     n3p, si                 ; save for later use
        mov     ax, word ptr r
        mov     ds, bignum_seg          ; load ds
        mov     rp1, ax
        mov     rp2, ax                 ; start with rp2=rp1

        cmp     i, 0                    ; if bignumberlength is 2
;        je      skip_middle_terms_16
        jne     top_outer_loop_16
        jmp     skip_middle_terms_16

top_outer_loop_16:
        mov     ax, steps               ; set inner loop counter
        mov     j, ax

top_inner_loop_16:
        mov     ax, ds:[di]
        mul     word ptr ds:[si]

        mov     bx, rp2
        add     bx, 2                   ; increase by size of word
        add     ds:[bx-2], ax           ; add low word
        adc     ds:[bx], dx             ; add high word
        jnc     no_more_carry_16        ; carry loop not necessary

        mov     cx, carry_steps         ; how many till end of double big number
        jcxz    no_more_carry_16
        add     bx, 2                   ; move pointer to next word

        ; loop until no more carry or until end of double big number
top_carry_loop_16:
        add     word ptr ds:[bx], 1     ; use add, not inc
        jnc     no_more_carry_16
        add     bx, 2                   ; increase by size of word
        loop    top_carry_loop_16

no_more_carry_16:
        add     si, 2                   ; increase by word size
        add     rp2, 2
        dec     carry_steps             ; use one less step
        dec     j
        ja      top_inner_loop_16

        add     di, 2                   ; increase by word size

        mov     ax, rodd                ; whether r is on an odd or even word

        cmp     skips, 0
        jle     type2_shifts_16
        sub     n3p, 2                  ; point to previous word
        mov     si, n3p
        inc     steps                   ; one more step this time
        ; leave rp1 and doublesteps where they are
        dec     skips
        jmp     shifts_bottom_16
type2_shifts_16:    ; only gets executed once
        jl      type3_shifts_16
        sub     steps, ax               ; steps -= (0 or 1)
        inc     ax                      ; ax = 1 or 2 now
        sub     doublesteps, ax         ; decrease double steps by 1 or 2
        shl     ax, 1                   ; 1 byte = 1/2 word
        add     rp1, ax                 ; add 1 or 2 words
        mov     si, di
        add     si, 2                   ; si = di + word
        dec     skips                   ; make skips negative
        jmp     shifts_bottom_16
type3_shifts_16:
        dec     steps
        sub     doublesteps, 2
        add     rp1, 4                  ; + two words
        mov     si, di
        add     si, 2                   ; si = di + word
shifts_bottom_16:

        mov     ax, rp1
        mov     rp2, ax                 ; start with rp2=rp1

        mov     ax, doublesteps
        mov     carry_steps, ax

        dec     i
;        ja      top_outer_loop_16
        jna     not_top_outer_loop_16
        jmp     top_outer_loop_16
not_top_outer_loop_16:
        ; All the middle terms have been multiplied.  Now double it.
        mov     ds, save_ds             ; restore ds to get bnlength
        push    bnlength                ; save bnlength
        mov     ax, rlength
        mov     bnlength, ax            ; r is of length rlength
        invoke  double_a_bn, r
        pop     bnlength

skip_middle_terms_16:
; Now go back and add in the squared terms.
; In the following loops, the following pointers are used
;   n1p(di) = points to the parts of n being used (es)
;   rp1(si) = points to part of doublebignumber used in outer loop (ds)
;   bx = points to part of doublebignumber for carry flag loop     (ds)

        ; be careful, the next dozen or so lines are confusing!

        ; determine whether r is on an odd or even word in the number
        mov     ax, bnlength
        shl     ax, 1                   ; double wide width
        sub     ax, rlength             ; 2*bnlength-rlength
        mov     dx, ax                  ; save this for a moment
        and     ax, 0002h               ; check the odd sign bit

        mov     si, word ptr r          ; load r pointer in ds:si
        add     si, ax                  ; depending on odd or even byte

        shr     dx, 1                   ; assumes word size
        inc     dx
        and     dx, 0FFFEh              ; ~2+1, turn off last bit, mult of 2
        mov     di, word ptr n          ; load n1p pointer in di
                                        ; es is still set from before
        add     di, dx

        mov     ax, bnlength
        sub     ax, dx
        shr     ax, 1                   ; 1 byte = 1/2 word
        mov     i, ax

        shl     ax, 1                   ; double steps
        sub     ax, 2                   ; only 2*s-2 steps are really needed
        mov     doublesteps, ax
        mov     carry_steps, ax

        mov     ds, bignum_seg          ; load ds

top_outer_loop_squares_16:

        mov     ax, ds:[di]
        mul     ax                      ; square it

        mov     bx, si
        add     bx, 2                   ; increase by size of word
        add     ds:[bx-2], ax           ; add low word
        adc     ds:[bx], dx             ; add high word
        jnc     no_more_carry_squares_16 ; carry loop not necessary

        mov     cx, carry_steps         ; how many till end of double big number
        jcxz    no_more_carry_squares_16
        add     bx, 2                   ; move pointer to next word

        ; loop until no more carry or until end of double big number
top_carry_loop_squares_16:
        add     word ptr ds:[bx], 1     ; use add, not inc
        jnc     no_more_carry_squares_16
        add     bx, 2                   ; increase by size of word
        loop    top_carry_loop_squares_16

no_more_carry_squares_16:
        add     di, 2                   ; increase by word size
        add     si, 4                   ; increase by 2*word size

        sub     doublesteps,2           ; reduce the carry steps needed
        mov     ax, doublesteps
        mov     carry_steps, ax

        dec     i
        ja      top_outer_loop_squares_16


        ; result is in r
ENDIF

IFDEF BIG16AND32
        jmp     bottom
ENDIF

IFDEF BIG32
use_32_bit:
.386
        ; clear r
        sub     eax, eax                ; clear eax
        mov     cx, rlength             ; size of rlength in bytes
        shr     cx, 2                   ; byte = 1/4 dword
        mov     di, word ptr r          ; load r pointer in es:di for stos
        rep     stosd                   ; initialize r to 0

        ; initialize vars

        ; determine whether r is on an odd or even dword in the number
        ; (even if rlength==2*bnlength, dec r alternates odd/even)
        mov     ax, bnlength
        shl     ax, 1                   ; double wide width
        sub     ax, rlength             ; 2*bnlength-rlength
        shr     ax, 2                   ; 1 byte = 1/4 dword
        and     ax, 0001h               ; check the odd sign bit
        mov     rodd, ax

        mov     ax, bnlength            ; set outer loop counter
        shr     ax, 2                   ; byte = 1/4 dword
        dec     ax                      ; don't need to do last one
 1208         mov     i, ax                   ; loop counter
 1209 
 1210         mov     ax, rlength             ; set steps for first loop
 1211         sub     ax, bnlength
 1212         shr     ax, 2                   ; byte = 1/4 dword
 1213         mov     steps, ax               ; save in steps
 1214 
 1215         mov     dx, bnlength
 1216         shr     dx, 2                   ; bnlength/4
 1217         add     ax, dx                  ; steps+bnlength/4
 1218         sub     ax, 2                   ; steps+bnlength/4-2
 1219         mov     doublesteps, ax
 1220         mov     carry_steps, ax
 1221 
 1222         mov     ax, i
 1223         sub     ax, steps
 1224         shr     ax, 1                   ; for both words and dwords
 1225         mov     skips, ax               ; how long to skip over pointer shifts
 1226 
 1227         ; initialize pointers
 1228         mov     di, word ptr n          ; load n1p pointer
 1229         mov     si, di
 1230         mov     ax, bnlength
 1231         shr     ax, 2                   ; 1 byte = 1/4 dword
 1232         sub     ax, steps
 1233         shl     ax, 2                   ; 1 byte = 1/4 dword
 1234         add     si, ax                  ; n2p = n1p + bnlength/4 - steps
 1235         mov     n3p, si                 ; save for later use
 1236         mov     ax, word ptr r
 1237         mov     ds, bignum_seg          ; load ds
 1238         mov     rp1, ax
 1239         mov     rp2, ax                 ; start with rp2=rp1
 1240 
 1241         cmp     i, 0                    ; if bignumberlength is 2
 1242         je      skip_middle_terms_32
 1243 
 1244 top_outer_loop_32:
 1245         mov     ax, steps               ; set inner loop counter
 1246         mov     j, ax
 1247 
 1248 top_inner_loop_32:
 1249         mov     eax, ds:[di]
 1250         mul     dword ptr ds:[si]
 1251 
 1252         mov     bx, rp2
 1253         add     bx, 4                   ; increase by size of dword
 1254         add     ds:[bx-4], eax          ; add low dword
 1255         adc     ds:[bx], edx            ; add high dword
 1256         jnc     no_more_carry_32        ; carry loop not necessary
 1257 
 1258         mov     cx, carry_steps         ; how many till end of double big number
 1259         jcxz    no_more_carry_32
 1260         add     bx, 4                   ; move pointer to next dword
 1261 
 1262         ; loop until no more carry or until end of double big number
 1263 top_carry_loop_32:
 1264         add     dword ptr ds:[bx], 1    ; use add, not inc
 1265         jnc     no_more_carry_32
 1266         add     bx, 4                   ; increase by size of dword
 1267         loop    top_carry_loop_32
 1268 
 1269 no_more_carry_32:
 1270         add     si, 4                   ; increase by dword size
 1271         add     rp2, 4
 1272         dec     carry_steps             ; use one less step
 1273         dec     j
 1274         ja      top_inner_loop_32
 1275 
 1276         add     di, 4                   ; increase by dword size
 1277 
 1278         mov     ax, rodd                ; whether r is on an odd or even dword
 1279 
 1280         cmp     skips, 0
 1281         jle     type2_shifts_32
 1282         sub     n3p, 4                  ; point to previous dword
 1283         mov     si, n3p
 1284         inc     steps                   ; one more step this time
 1285         ; leave rp1 and doublesteps where they are
 1286         dec     skips
 1287         jmp     shifts_bottom_32
 1288 type2_shifts_32:    ; only gets executed once
 1289         jl      type3_shifts_32
 1290         sub     steps, ax               ; steps -= (0 or 1)
 1291         inc     ax                      ; ax = 1 or 2 now
 1292         sub     doublesteps, ax         ; decrease double steps by 1 or 2
 1293         shl     ax, 2                   ; 1 byte = 1/4 dword
 1294         add     rp1, ax                 ; add 1 or 2 dwords
 1295         mov     si, di
 1296         add     si, 4                   ; si = di + dword
 1297         dec     skips                   ; make skips negative
 1298         jmp     shifts_bottom_32
 1299 type3_shifts_32:
 1300         dec     steps
 1301         sub     doublesteps, 2
 1302         add     rp1, 8                  ; + two dwords
 1303         mov     si, di
 1304         add     si, 4                   ; si = di + dword
 1305 shifts_bottom_32:
 1306 
 1307         mov     ax, rp1
 1308         mov     rp2, ax                 ; start with rp2=rp1
 1309 
 1310         mov     ax, doublesteps
 1311         mov     carry_steps, ax
 1312 
 1313         dec     i
 1314         ja      top_outer_loop_32
 1315 
 1316         ; All the middle terms have been multiplied.  Now double it.
 1317         mov     ds, save_ds             ; restore ds to get bnlength
 1318         push    bnlength                ; save bnlength
 1319         mov     ax, rlength
 1320         mov     bnlength, ax            ; r is of length rlength
 1321         invoke  double_a_bn, r
 1322         pop     bnlength
 1323 
 1324 skip_middle_terms_32:
 1325 ; Now go back and add in the squared terms.
 1326 ; In the following loops, the following pointers are used
 1327 ;   n1p(di) = points to the parts of n being used (es)
 1328 ;   rp1(si) = points to part of doublebignumber used in outer loop (ds)
 1329 ;   bx = points to part of doublebignumber for carry flag loop     (ds)
 1330 
 1331         ; be careful, the next dozen or so lines are confusing!
 1332 
 1333         ; determine whether r is on an odd or even word in the number
 1334         mov     ax, bnlength
 1335         shl     ax, 1                   ; double wide width
 1336         sub     ax, rlength             ; 2*bnlength-rlength
 1337         mov     dx, ax                  ; save this for a moment
 1338         and     ax, 0004h               ; check the odd sign bit
 1339 
 1340         mov     si, word ptr r          ; load r pointer in ds:si
 1341         add     si, ax                  ; depending on odd or even byte
 1342 
 1343         shr     dx, 2                   ; assumes dword size
 1344         inc     dx
 1345         and     dx, 0FFFEh              ; ~2+1, turn off last bit, mult of 2
 1346         shl     dx, 1
 1347         mov     di, word ptr n          ; load n1p pointer in di
 1348                                         ; es is still set from before
 1349         add     di, dx
 1350 
 1351         mov     ax, bnlength
 1352         sub     ax, dx
 1353         shr     ax, 2                   ; 1 byte = 1/4 dword
 1354         mov     i, ax
 1355 
 1356         shl     ax, 1                   ; double steps
 1357         sub     ax, 2                   ; only 2*s-2 steps are really needed
 1358         mov     doublesteps, ax
 1359         mov     carry_steps, ax
 1360 
 1361         mov     ds, bignum_seg          ; load ds
 1362 
 1363 top_outer_loop_squares_32:
 1364 
 1365         mov     eax, ds:[di]
 1366         mul     eax                     ; square it
 1367 
 1368         mov     bx, si
 1369         add     bx, 4                   ; increase by size of dword
 1370         add     ds:[bx-4], eax          ; add low dword
 1371         adc     ds:[bx], edx            ; add high dword
 1372         jnc     no_more_carry_squares_32 ; carry loop not necessary
 1373 
 1374         mov     cx, carry_steps         ; how many till end of double big number
 1375         jcxz    no_more_carry_squares_32
 1376         add     bx, 4                   ; move pointer to next dword
 1377 
 1378         ; loop until no more carry or until end of double big number
 1379 top_carry_loop_squares_32:
 1380         add     dword ptr ds:[bx], 1    ; use add, not inc
 1381         jnc     no_more_carry_squares_32
 1382         add     bx, 4                   ; increase by size of dword
 1383         loop    top_carry_loop_squares_32
 1384 
 1385 no_more_carry_squares_32:
 1386         add     di, 4                   ; increase by dword size
 1387         add     si, 8                   ; increase by 2*dword size
 1388 
 1389         sub     doublesteps,2           ; reduce the carry steps needed
 1390         mov     ax, doublesteps
 1391         mov     carry_steps, ax
 1392 
 1393         dec     i
 1394         ja      top_outer_loop_squares_32
 1395 
 1396 
 1397         ; result is in r
 1398 ENDIF
 1399 
 1400 bottom:
 1401 .8086
 1402 
 1403 ; since it is a square, the result has to already be positive
 1404 
 1405         mov     ds, save_ds             ; restore ds
 1406         mov     ax, word ptr r          ; return r in ax
 1407 
 1408 quit_proc:
 1409         ret
 1410 unsafe_square_bn   ENDP
 1411 
 1412 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 1413 ; r = n * u  where u is an unsigned integer
 1414 mult_bn_int   PROC USES di si, r:bn_t, n:bn_t, u:word
 1415 LOCAL   lu:dword  ; long unsigned integer in 32 bit math
 1416 
 1417         push    ds                      ; save ds
 1418         mov     cx, bnlength
 1419         mov     di, WORD PTR r
 1420         mov     si, WORD PTR n
 1421 
 1422 
 1423 IFDEF BIG16AND32
 1424         cmp     cpu, 386                ; check cpu
 1425         jae     use_32_bit              ; use faster 32 bit code if possible
 1426 ENDIF
 1427 
 1428 IFDEF BIG16
 1429         mov     ds, bignum_seg          ; load ds
 1430 
 1431         ; no need to clear r
 1432 
 1433         shr     cx, 1                   ; byte = 1/2 word
 1434         sub     bx, bx                  ; use bx for temp holding carried word
 1435 
 1436 top_loop_16:
 1437         mov     ax, ds:[si]             ; load next word from n
 1438         mul     u                       ; n * u
 1439         add     ax, bx                  ; add last carried upper word
 1440         adc     dx, 0                   ; inc the carried word if carry flag set
 1441         mov     bx, dx                  ; save high word in bx
 1442         mov     ds:[di], ax             ; save low word
 1443 
 1444         add     di, 2                   ; next word in r
 1445         add     si, 2                   ; next word in n
 1446         loop    top_loop_16
 1447 ENDIF
 1448 
 1449 IFDEF BIG16AND32
 1450         jmp     bottom
 1451 ENDIF
 1452 
 1453 IFDEF BIG32
 1454 use_32_bit:
 1455 .386
 1456         mov     ds, bignum_seg          ; load ds
 1457 
 1458         ; no need to clear r
 1459 
 1460         shr     cx, 2                   ; byte = 1/4 dword
 1461         sub     ebx, ebx                ; use ebx for temp holding carried dword
 1462 
 1463         sub     eax, eax                ; clear upper eax
 1464         mov     ax, u                   ; convert u (unsigned int)
 1465         mov     lu, eax                 ;   to lu (long unsigned int)
 1466 
 1467 top_loop_32:
 1468         mov     eax, ds:[si]            ; load next dword from n
 1469         mul     lu                      ; n * lu
 1470         add     eax, ebx                ; add last carried upper dword
 1471         adc     edx, 0                  ; inc the carried dword if carry flag set
 1472         mov     ebx, edx                ; save high dword in ebx
 1473         mov     ds:[di], eax            ; save low dword
 1474 
 1475         add     di, 4                   ; next dword in r
 1476         add     si, 4                   ; next dword in n
 1477         loop    top_loop_32
 1478 ENDIF
 1479 
 1480 bottom:
 1481 .8086
 1482 
 1483         pop     ds
 1484         mov     ax, word ptr r          ; return r in ax
 1485         ret
 1486 mult_bn_int   ENDP
 1487 
 1488 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 1489 ; r *= u  where u is an unsigned integer
 1490 mult_a_bn_int   PROC USES di si, r:bn_t, u:word
 1491 
 1492         push    ds                      ; save ds
 1493         mov     cx, bnlength            ; set outer loop counter
 1494         mov     si, WORD PTR r
 1495 
 1496 
 1497 IFDEF BIG16AND32
 1498         cmp     cpu, 386                ; check cpu
 1499         jae     use_32_bit              ; use faster 32 bit code if possible
 1500 ENDIF
 1501 
 1502 IFDEF BIG16
 1503         mov     ds, bignum_seg          ; load ds
 1504         ; no need to clear r
 1505         shr     cx, 1                   ; byte = 1/2 word
 1506         sub     bx, bx                  ; use bx for temp holding carried word
 1507         mov     di, u                   ; save u in di
 1508 
 1509 top_loop_16:
 1510         mov     ax, ds:[si]             ; load next word from r
 1511         mul     di                      ; r * u
 1512         add     ax, bx                  ; add last carried upper word
 1513         adc     dx, 0                   ; inc the carried word if carry flag set
 1514         mov     bx, dx                  ; save high word in bx
 1515         mov     ds:[si], ax             ; save low word
 1516 
 1517         add     si, 2                   ; next word in r
 1518         loop    top_loop_16
 1519 ENDIF
 1520 
 1521 IFDEF BIG16AND32
 1522         jmp     bottom
 1523 ENDIF
 1524 
 1525 IFDEF BIG32
 1526 use_32_bit:
 1527 .386
 1528         mov     ds, bignum_seg          ; load ds
 1529         ; no need to clear r
 1530         shr     cx, 2                   ; byte = 1/4 dword
 1531         sub     ebx, ebx                ; use ebx for temp holding carried dword
 1532         sub     edi, edi                ; clear upper edi
 1533         mov     di, u                   ; save u in lower di
 1534 
 1535 top_loop_32:
 1536         mov     eax, ds:[si]            ; load next dword from r
 1537         mul     edi                     ; r * u
 1538         add     eax, ebx                ; add last carried upper dword
 1539         adc     edx, 0                  ; inc the carried dword if carry flag set
 1540         mov     ebx, edx                ; save high dword in ebx
 1541         mov     ds:[si], eax            ; save low dword
 1542 
 1543         add     si, 4                   ; next dword in r
 1544         loop    top_loop_32
 1545 ENDIF
 1546 
 1547 bottom:
 1548 .8086
 1549 
 1550         pop     ds                      ; restore ds
 1551         mov     ax, word ptr r          ; return r in ax
 1552         ret
 1553 mult_a_bn_int   ENDP
 1554 
 1555 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 1556 ; r = n / u  where u is an unsigned integer
 1557 unsafe_div_bn_int   PROC USES di si, r:bn_t, n:bn_t, u:word
 1558 LOCAL  sign:byte
 1559 
 1560         push    ds
 1561                                             ; check for sign bits
 1562         mov     bx, WORD PTR n
 1563         mov     es, bignum_seg              ; load n pointer es:bx
 1564         add     bx, bnlength
 1565         mov     al, es:[bx-1]
 1566         and     al, 80h                     ; check the sign bit
 1567         mov     sign, al
 1568         jz      already_pos
 1569         invoke  neg_a_bn, n
 1570 already_pos:
 1571 
 1572         mov     cx, bnlength                ; set outer loop counter
 1573         mov     di, word ptr r
 1574         mov     si, word ptr n              ; load pointers ds:si
 1575         ; past most significant portion of the number
 1576         add     si, cx
 1577         add     di, cx
 1578 
 1579 IFDEF BIG16AND32
 1580         cmp     cpu, 386                ; check cpu
 1581         jae     use_32_bit              ; use faster 32 bit code if possible
 1582 ENDIF
 1583 
 1584 IFDEF BIG16
 1585         mov     ds, bignum_seg          ; load ds
 1586 
 1587         ; no need to clear r here, values get mov'ed, not add'ed
 1588         shr     cx, 1                   ; byte = 1/2 word
 1589         mov     bx, u
 1590 
 1591         ; need to start with most significant portion of the number
 1592         sub     si, 2                   ; most sig word
 1593         sub     di, 2                   ; most sig word
 1594 
 1595         sub     dx, dx                  ; clear dx register
 1596                                         ; for first time through loop
 1597 top_loop_16:
 1598         mov     ax, ds:[si]             ; load next word from n
 1599         div     bx
 1600         mov     ds:[di], ax             ; store low word
 1601                                         ; leave remainder in dx
 1602 
 1603         sub     si, 2                   ; next word in n
 1604         sub     di, 2                   ; next word in r
 1605         loop    top_loop_16
 1606 ENDIF
 1607 
 1608 IFDEF BIG16AND32
 1609         jmp     bottom
 1610 ENDIF
 1611 
 1612 IFDEF BIG32
 1613 use_32_bit:
 1614 .386
 1615         mov     ds, bignum_seg          ; load ds
 1616 
 1617         ; no need to clear r here, values get mov'ed, not add'ed
 1618         shr     cx, 2                   ; byte = 1/4 dword
 1619         sub     ebx, ebx                ; clear upper word or ebx
 1620         mov     bx, u
 1621 
 1622         ; need to start with most significant portion of the number
 1623         sub     si, 4                   ; most sig dword
 1624         sub     di, 4                   ; most sig dword
 1625 
 1626         sub     edx, edx                ; clear edx register
 1627                                         ; for first time through loop
 1628 top_loop_32:
 1629         mov     eax, ds:[si]            ; load next dword from n
 1630         div     ebx
 1631         mov     ds:[di], eax            ; store low dword
 1632                                         ; leave remainder in edx
 1633 
 1634         sub     si, 4                   ; next dword in n
 1635         sub     di, 4                   ; next dword in r
 1636         loop    top_loop_32
 1637 ENDIF
 1638 
 1639 bottom:
 1640 .8086
 1641 
 1642         pop     ds                      ; restore ds
 1643 
 1644         cmp     sign, 0                 ; is result + or - ?
 1645         je      pos_answer              ; yes
 1646         invoke  neg_a_bn, r             ; does not affect ES
 1647 pos_answer:
 1648 
 1649         mov     ax, word ptr r          ; return r in ax
 1650         ret
 1651 unsafe_div_bn_int   ENDP
 1652 
 1653 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 1654 ; r /= u  where u is an unsigned integer
 1655 div_a_bn_int   PROC USES si, r:bn_t, u:word
 1656 LOCAL  sign:byte
 1657 
 1658         push    ds
 1659 
 1660         mov     bx, WORD PTR r
 1661         mov     es, bignum_seg              ; load r pointer es:bx
 1662         add     bx, bnlength
 1663         mov     al, es:[bx-1]
 1664         and     al, 80h                     ; check the sign bit
 1665         mov     sign, al
 1666         jz      already_pos
 1667         invoke  neg_a_bn, r
 1668 already_pos:
 1669 
 1670         mov     cx, bnlength            ; set outer loop counter
 1671         mov     si, WORD PTR r
 1672         ; past most significant portion of the number
 1673         add     si, cx
 1674 
 1675 
 1676 IFDEF BIG16AND32
 1677         cmp     cpu, 386                ; check cpu
 1678         jae     use_32_bit              ; use faster 32 bit code if possible
 1679 ENDIF
 1680 
 1681 IFDEF BIG16
 1682         mov     ds, bignum_seg          ; load ds
 1683 
 1684         ; no need to clear r here, values get mov'ed, not add'ed
 1685         shr     cx, 1                   ; byte = 1/2 word
 1686         mov     bx, u
 1687 
 1688         ; need to start with most significant portion of the number
 1689         sub     si, 2                   ; most sig word
 1690 
 1691         sub     dx, dx                  ; clear dx register
 1692                                         ; for first time through loop
 1693 top_loop_16:
 1694         mov     ax, ds:[si]             ; load next word from r
 1695         div     bx
 1696         mov     ds:[si], ax             ; store low word
 1697                                         ; leave remainder in dx
 1698 
 1699         sub     si, 2                   ; next word in r
 1700         loop    top_loop_16
 1701 ENDIF
 1702 
 1703 IFDEF BIG16AND32
 1704         jmp     bottom
 1705 ENDIF
 1706 
 1707 IFDEF BIG32
 1708 use_32_bit:
 1709 .386
 1710         mov     ds, bignum_seg          ; load ds
 1711 
 1712         ; no need to clear r here, values get mov'ed, not add'ed
 1713         shr     cx, 2                   ; byte = 1/4 dword
 1714         sub     ebx, ebx                ; clear upper word or ebx
 1715         mov     bx, u
 1716 
 1717         ; need to start with most significant portion of the number
 1718         sub     si, 4                   ; most sig dword
 1719 
 1720         sub     edx, edx                ; clear edx register
 1721                                         ; for first time through loop
 1722 top_loop_32:
 1723         mov     eax, ds:[si]            ; load next dword from r
 1724         div     ebx
 1725         mov     ds:[si], eax            ; store low dword
 1726                                         ; leave remainder in edx
 1727 
 1728         sub     si, 4                   ; next dword in r
 1729         loop    top_loop_32
 1730 ENDIF
 1731 
 1732 bottom:
 1733 .8086
 1734         pop     ds                      ; restore ds
 1735 
 1736         cmp     sign, 0                 ; is result + or - ?
 1737         je      pos_answer              ; yes
 1738         invoke  neg_a_bn, r             ; does not affect ES
 1739 pos_answer:
 1740 
 1741         mov     ax, word ptr r          ; return r in ax
 1742         ret
 1743 div_a_bn_int   ENDP
 1744 
 1745 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 1746 ; bf_t routines
 1747 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 1748 ; r = 0    (just like clear_bn() but loads bflength+2 instead of bnlength)
 1749 clear_bf   PROC USES di, r:bf_t
 1750 
 1751         mov     cx, bflength
 1752         mov     di, word ptr r
 1753         mov     es, bignum_seg          ; load pointer in es:di
 1754 
 1755 IFDEF BIG16AND32
 1756         cmp     cpu, 386                ; check cpu
 1757         jae     short use_32_bit        ; use faster 32 bit code if possible
 1758 ENDIF
 1759 
 1760 IFDEF BIG16
 1761         sub     ax, ax                  ; clear ax
 1762         shr     cx, 1                   ; 1 byte = 1/2 word
 1763         inc     cx                      ; plus the exponent
 1764         rep     stosw                   ; clear r, word at a time
 1765 ENDIF
 1766 
 1767 IFDEF BIG16AND32
 1768         jmp     bottom
 1769 ENDIF
 1770 
 1771 IFDEF BIG32
 1772 use_32_bit:
 1773 .386
 1774         sub     eax, eax                ; clear eax
 1775         shr     cx, 2                   ; 1 byte = 1/4 word
 1776         rep     stosd                   ; clear r, dword at a time
 1777         stosw                           ; plus the exponent
 1778 ENDIF
 1779 
 1780 bottom:
 1781 .8086
 1782         mov     ax, word ptr r          ; return r in ax
 1783         ret
 1784 
 1785 clear_bf   ENDP
 1786 
 1787 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 1788 ; r = n
 1789 copy_bf   PROC USES di si, r:bf_t, n:bf_t
 1790 
 1791         mov     ax, ds                  ; save ds for later
 1792         mov     cx, bflength
 1793         add     cx, 2
 1794         mov     di, word ptr r
 1795         mov     es, bignum_seg          ; load pointer in es:di
 1796         mov     si, word ptr n
 1797 
 1798 IFDEF BIG16AND32
 1799         cmp     cpu, 386                ; check cpu
 1800         jae     short use_32_bit        ; use faster 32 bit code if possible
 1801 ENDIF
 1802 
 1803 IFDEF BIG16
 1804         mov     ds, bignum_seg          ; load pointer in ds:si for movs
 1805 
 1806         shr     cx, 1                   ; 1 byte = 1/2 word
 1807         inc     cx                      ; plus the exponent
 1808         rep     movsw                   ; copy word at a time
 1809 ENDIF
 1810 
 1811 IFDEF BIG16AND32
 1812         jmp     bottom
 1813 ENDIF
 1814 
 1815 IFDEF BIG32
 1816 use_32_bit:
 1817 .386
 1818         mov     ds, bignum_seg          ; load pointer in ds:si for movs
 1819 
 1820         shr     cx, 2                   ; 1 byte = 1/4 word
 1821         rep     movsd                   ; copy dword at a time
 1822         movsw                           ; plus the exponent
 1823 ENDIF
 1824 
 1825 bottom:
 1826 .8086
 1827         mov     ds, ax                  ; restore ds
 1828         mov     ax, word ptr r          ; return r in ax
 1829         ret
 1830 
 1831 copy_bf   ENDP
 1832 
 1833 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 1834 ; LDBL bftofloat(bf_t n);
 1835 ; converts a bf number to a 10 byte real
 1836 ;
 1837 bftofloat   PROC USES di si, n:bf_t
 1838    LOCAL value[11]:BYTE   ; 11=10+1
 1839 
 1840       mov      ax, ds                  ; save ds
 1841 
 1842       mov      cx, 9                   ; need up to 9 bytes
 1843       cmp      bflength, 10            ; but no more than bflength-1
 1844       jae      movebytes_set
 1845       mov      cx, bflength            ; bflength is less than 10
 1846       dec      cx                      ; cx=movebytes=bflength-1, 1 byte padding
 1847 movebytes_set:
 1848 
 1849 IFDEF BIG16AND32
 1850       cmp     cpu, 386              ; check cpu
 1851 ;      jae     use_32_bit            ; use faster 32 bit code if possible
 1852       jb      over_use_32bit
 1853       jmp     use_32_bit            ; use faster 32 bit code if possible
 1854 over_use_32bit:
 1855 ENDIF
 1856 
 1857 IFDEF BIG16
 1858 ; 16 bit code
 1859       ; clear value
 1860       mov      word ptr value[0], 0
 1861       mov      word ptr value[2], 0
 1862       mov      word ptr value[4], 0
 1863       mov      word ptr value[6], 0
 1864       mov      word ptr value[8], 0
 1865       mov      byte ptr value[10], 0
 1866 
 1867       ; copy bytes from n to value
 1868       lea      di, value+9
 1869       sub      di, cx               ; cx holds movebytes
 1870       mov      dx, ss               ; move ss to es for movs
 1871       mov      es, dx               ; ie: move ss:value+9-cx to es:di
 1872       mov      bx, bflength
 1873       dec      bx
 1874       sub      bx, cx               ; cx holds movebytes
 1875       mov      si, word ptr n
 1876       mov      ds, bignum_seg       ; move n to ds:si for movs
 1877       add      si, bx               ; n+bflength-1-movebytes
 1878       rep movsb
 1879       mov      bl, ds:[si]          ; save sign byte, si now points to it
 1880       inc      si                   ; point to exponent
 1881       mov      dx, ds:[si]          ; use dx as exponent
 1882       mov      cl, 3                ; put exponent (dx) in base 2
 1883       shl      dx, cl               ; 256^n = 2^(8n)
 1884 
 1885       ; adjust for negative values
 1886       and      bl, 10000000b           ; isolate sign bit
 1887       jz       not_neg_16
 1888       neg      word ptr value[0]       ; take the negative of the 9 byte number
 1889       cmc                              ; toggle carry flag
 1890       not      word ptr value[2]
 1891       adc      word ptr value[2], 0
 1892       not      word ptr value[4]
 1893       adc      word ptr value[4], 0
 1894       not      word ptr value[6]
 1895       adc      word ptr value[6], 0
 1896       not      byte ptr value[8]       ; notice this last one is byte ptr
 1897       adc      byte ptr value[8], 0
 1898 not_neg_16:
 1899 
 1900       cmp      byte ptr value[8], 0          ; test for 0
 1901       jnz      top_shift_16
 1902       fldz
 1903       jmp      return
 1904 
 1905       ; Shift until most signifcant bit is set.
 1906 top_shift_16:
 1907       test     byte ptr value[8], 10000000b  ; test msb
 1908       jnz      bottom_shift_16
 1909       dec      dx                      ; decrement exponent
 1910       shl      word ptr value[0], 1    ; shift left the 9 byte number
 1911       rcl      word ptr value[2], 1
 1912       rcl      word ptr value[4], 1
 1913       rcl      word ptr value[6], 1
 1914       rcl      byte ptr value[8], 1    ; notice this last one is byte ptr
 1915       jmp      top_shift_16
 1916 bottom_shift_16:
 1917 
 1918       ; round last byte
 1919       cmp      byte ptr value[0], 80h  ;
 1920 ;      jb       bottom                  ; no rounding necessary
 1921       jnb      not_bottom1
 1922       jmp      bottom                  ; no rounding necessary
 1923 not_bottom1:
 1924       add      word ptr value[1], 1
 1925       adc      word ptr value[3], 0
 1926       adc      word ptr value[5], 0
 1927       adc      word ptr value[7], 0
 1928 ;      jnc      bottom
 1929       jc       not_bottom2
 1930       jmp      bottom
 1931 not_bottom2:
 1932       ; to get to here, the pattern was rounded from +FFFF...
 1933       ; to +10000... with the 1 getting moved to the carry bit
 1934 ENDIF
 1935 
 1936 IFDEF BIG16AND32
 1937       jmp      rounded_past_end
 1938 ENDIF
 1939 
 1940 IFDEF BIG32
 1941 use_32_bit:
 1942 .386
 1943       ; clear value
 1944       mov      dword ptr value[0], 0
 1945       mov      dword ptr value[4], 0
 1946       mov      word ptr value[8],  0
 1947       mov      byte ptr value[10], 0
 1948 
 1949       ; copy bytes from n to value
 1950       lea      di, value+9
 1951       sub      di, cx               ; cx holds movebytes
 1952       mov      dx, ss               ; move ss to es for movs
 1953       mov      es, dx               ; ie: move ss:value+9-cx to es:di
 1954       mov      bx, bflength
 1955       dec      bx
 1956       sub      bx, cx               ; cx holds movebytes
 1957       mov      si, word ptr n
 1958       mov      ds, bignum_seg       ; move n to ds:si for movs
 1959       add      si, bx               ; n+bflength-1-movebytes
 1960       rep movsb
 1961       mov      bl, ds:[si]          ; save sign byte, si now points to it
 1962       inc      si                   ; point to exponent
 1963       mov      dx, ds:[si]          ; use dx as exponent
 1964       shl      dx, 3                ; 256^n = 2^(8n)
 1965 
 1966       ; adjust for negative values
 1967       and      bl, 10000000b           ; determine sign
 1968       jz       not_neg_32
 1969       neg      dword ptr value[0]      ; take the negative of the 9 byte number
 1970       cmc                              ; toggle carry flag
 1971       not      dword ptr value[4]
 1972       adc      dword ptr value[4], 0
 1973       not      byte ptr value[8]       ; notice this last one is byte ptr
 1974       adc      byte ptr value[8], 0
 1975 not_neg_32:
 1976 
 1977       cmp      byte ptr value[8], 0          ; test for 0
 1978       jnz      top_shift_32
 1979       fldz
 1980       jmp      return
 1981 
 1982       ; Shift until most signifcant bit is set.
 1983 top_shift_32:
 1984       test     byte ptr value[8], 10000000b  ; test msb
 1985       jnz      bottom_shift_32
 1986       dec      dx                      ; decrement exponent
 1987       shl      dword ptr value[0], 1   ; shift left the 9 byte number
 1988       rcl      dword ptr value[4], 1
 1989       rcl      byte ptr value[8], 1    ; notice this last one is byte ptr
 1990       jmp      top_shift_32
 1991 bottom_shift_32:
 1992 
 1993       ; round last byte
 1994       cmp      byte ptr value[0], 80h  ;
 1995       jb       bottom                  ; no rounding necessary
 1996       add      dword ptr value[1], 1
 1997       adc      dword ptr value[5], 0
 1998       jnc      bottom
 1999 
 2000       ; to get to here, the pattern was rounded from +FFFF...
 2001       ; to +10000... with the 1 getting moved to the carry bit
 2002 ENDIF
 2003 
 2004 rounded_past_end:
 2005 .8086 ; used in 16 it code as well
 2006       mov      byte ptr value[8], 10000000b
 2007       inc      dx                      ; adjust the exponent
 2008 
 2009 bottom:
 2010       ; adjust exponent
 2011       add      dx, 3FFFh+7             ; unbiased -> biased, + adjusted
 2012       or       dh, bl                  ; set sign bit if set
 2013       mov      word ptr value[9], dx
 2014 
 2015       ; unlike float and double, long double is returned on fpu stack
 2016       fld      real10 ptr value[1]    ; load return value
 2017 return:
 2018       mov      ds, ax                  ; restore ds
 2019       ret
 2020 
 2021 bftofloat   endp
 2022 
 2023 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 2024 ; LDBL floattobf(bf_t n, LDBL f);
 2025 ; converts a 10 byte real to a bf number
 2026 ;
 2027 floattobf   PROC USES di si, n:bf_t, f:REAL10
 2028    LOCAL value[9]:BYTE   ; 9=8+1
 2029 ; I figured out a way to do this with no local variables,
 2030 ; but it's not worth the extra overhead.

      invoke   clear_bf, n

      ; check to see if f is 0
      cmp      byte ptr f[7], 0        ; f[7] can only be 0 if f is 0
;      jz       return                  ; if f is 0, bailout now
      jnz      over_return
      jmp      return                  ; if f is 0, bailout now
over_return:

      mov      cx, 9                   ; need up to 9 bytes
      cmp      bflength, 10            ; but no more than bflength-1
      jae      movebytes_set
      mov      cx, bflength            ; bflength is less than 10
      dec      cx                      ; movebytes = bflength-1, 1 byte padding
movebytes_set:

IFDEF BIG16AND32
      cmp     cpu, 386              ; check cpu
      jae     use_32_bit            ; use faster 32 bit code if possible
ENDIF

IFDEF BIG16
; 16 bit code
      ; copy bytes from f's mantissa to value
 2031       mov      byte ptr value[0], 0    ; clear least sig byte
 2032       mov      ax, word ptr f[0]
 2033       mov      word ptr value[1], ax
 2034       mov      ax, word ptr f[2]
 2035       mov      word ptr value[3], ax
 2036       mov      ax, word ptr f[4]
 2037       mov      word ptr value[5], ax
 2038       mov      ax, word ptr f[6]
 2039       mov      word ptr value[7], ax
 2040 
 2041       ; get exponent in dx
 2042       mov      dx, word ptr f[8]       ; location of exponent
 2043       and      dx, 7FFFh               ; remove sign bit
 2044       sub      dx, 3FFFh+7             ; biased -> unbiased, + adjust
 2045 
 2046       ; Shift down until exponent is a mult of 8 (2^8n=256n)
 2047 top_shift_16:
 2048       test     dx, 111b                ; expon mod 8
 2049       jz       bottom
 2050       inc      dx                      ; increment exponent
 2051       shr      word ptr value[7], 1    ; shift right the 9 byte number
 2052       rcr      word ptr value[5], 1
 2053       rcr      word ptr value[3], 1
 2054       rcr      word ptr value[1], 1
 2055       rcr      byte ptr value[0], 1    ; notice this last one is byte ptr
 2056       jmp      top_shift_16
 2057 ENDIF
 2058 
 2059 IFDEF BIG32
 2060 use_32_bit:
 2061 .386
 2062       ; copy bytes from f's mantissa to value
      mov      byte ptr value[0], 0    ; clear least sig byte
      mov      eax, dword ptr f[0]
      mov      dword ptr value[1], eax
      mov      eax, dword ptr f[4]
      mov      dword ptr value[5], eax

      ; get exponent in dx
      mov      dx, word ptr f[8]       ; location of exponent
      and      dx, 7FFFh               ; remove sign bit
      sub      dx, 3FFFh+7             ; biased -> unbiased, + adjust

      ; Shift down until exponent is a mult of 8 (2^8n=256n)
top_shift_32:
      test     dx, 111b                ; expon mod 8
      jz       bottom
      inc      dx                      ; increment exponent
      shr      dword ptr value[5], 1   ; shift right the 9 byte number
      rcr      dword ptr value[1], 1
      rcr      byte ptr value[0], 1    ; notice this last one is byte ptr
      jmp      top_shift_32
ENDIF

bottom:
.8086
      ; Don't bother rounding last byte as it would only make a difference
 2063       ; when bflength < 9, and then only on the last bit.
 2064 
 2065       ; move data into place, from value to n
 2066       lea      si, value+9
 2067       sub      si, cx               ; cx holds movebytes
 2068       mov      ax, ds               ; save ds
 2069       mov      bx, ss               ; copy ss to ds for movs
 2070       mov      ds, bx               ; ds:si
 2071       mov      di, word ptr n
 2072       mov      es, bignum_seg       ; move n to es:di for movs
 2073       add      di, bflength
 2074       dec      di
 2075       sub      di, cx               ; cx holds movebytes
 2076       rep movsb
 2077       inc      di
 2078       mov      cl, 3
 2079       sar      dx, cl               ; divide expon by 8, 256^n=2^8n
 2080       mov      word ptr es:[di], dx ; store exponent
 2081       mov      ds, ax               ; restore ds
 2082 
 2083       ; get sign
 2084       test     byte ptr f[9], 10000000b           ; test sign bit
 2085       jz       not_negative
 2086       invoke   neg_a_bf, n
 2087 not_negative:
 2088 return:
 2089       mov      ax, word ptr n
 2090       mov      dx, word ptr n+2        ; return r in dx:ax
 2091       ret
 2092 floattobf   endp
 2093 
 2094 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 2095 ; LDBL bntofloat(bf_t n);
 2096 ; converts a bn number to a 10 byte real
 2097 ; (the most speed critical of these to/from float routines)
 2098 bntofloat   PROC USES di si, n:bn_t
 2099    LOCAL value[11]:BYTE   ; 11=10+1
 2100 
 2101       ; determine the most significant byte, not 0 or FF
 2102       mov      si, word ptr n
 2103       mov      es, bignum_seg
 2104       dec      si
 2105       add      si, bnlength            ; n+bnlength-1
 2106       mov      bl, es:[si]             ; top byte
 2107       mov      cx, bnlength            ; initialize cx with full bnlength
 2108       cmp      bl, 0                   ; test top byte against 0
 2109       je       determine_sig_bytes
 2110       cmp      bl, 0FFh                ; test top byte against -1
 2111       jne      sig_bytes_determined
 2112 
 2113 determine_sig_bytes:
 2114       dec      cx                      ; now bnlength-1
 2115 top_sig_byte:
 2116       dec      si                      ; previous byte
 2117       cmp      es:[si], bl             ; does it have the right stuff?
 2118       jne      sig_bytes_determined    ; (ie: does it match top byte?)
 2119       loop     top_sig_byte            ; decrement cx and repeat
 2120 
 2121 ; At this point, it must be 0 with no sig figs at all
 2122 ; or -1/(256^bnlength), one bit away from being zero.
 2123       cmp      bl, 0                   ; was it zero?
 2124       jnz      not_zero                ; no, it was a very small negative
 2125                                        ; yes
 2126       fldz                             ; return zero
 2127       jmp      return
 2128 not_zero:
 2129       mov      ax, intlength
 2130       sub      ax, bnlength
 2131       mov      cl, 3
 2132       shl      ax, cl                  ; 256^n=2^8n, now more like movebits
 2133       add      ax, 3FFFh+0             ; bias, no adjustment necessary
 2134       or       ah, 10000000b           ; turn on sign flag
 2135       mov      word ptr value[9], ax   ; store exponent
 2136       mov      word ptr value[7], 8000h ; store mantissa of 1 in most sig bit
 2137       ; clear rest of value that is actually used
 2138       mov      word ptr value[1], 0
 2139       mov      word ptr value[3], 0
 2140       mov      word ptr value[5], 0
 2141 
 2142       fld      real10 ptr value[1]
 2143       jmp      return
 2144 
 2145 sig_bytes_determined:
 2146       mov      dx, cx               ; save in dx for later
 2147       cmp      cx, 9-1              ; no more than cx bytes
 2148       jb       set_movebytes
 2149       mov      cx, 9-1              ; up to 8 bytes
 2150 set_movebytes:                      ; cx now holds movebytes
 2151                                     ; si still points to most non-0 sig byte
 2152       sub      si, cx               ; si now points to first byte to be moved
 2153       inc      cx                   ; can be up to 9
 2154 
 2155 IFDEF BIG16AND32
 2156       cmp     cpu, 386              ; check cpu
 2157 ;      jae     use_32_bit            ; use faster 32 bit code if possible
 2158       jb      not_use_32_bit
 2159       jmp     use_32_bit            ; use faster 32 bit code if possible
 2160 not_use_32_bit:
 2161 ENDIF
 2162 
 2163 IFDEF BIG16
 2164 ; 16 bit code
 2165       ; clear value
 2166       mov      word ptr value[0], 0
 2167       mov      word ptr value[2], 0
 2168       mov      word ptr value[4], 0
 2169       mov      word ptr value[6], 0
 2170       mov      word ptr value[8], 0
 2171       mov      byte ptr value[10], 0
 2172 
 2173       ; copy bytes from n to value  ; es:si still holds first move byte of n
 2174       lea      di, value+9
 2175       sub      di, cx               ; cx holds movebytes
 2176       mov      ax, ss               ; move ss to es
 2177       mov      es, ax               ; value[9] is in es:di
 2178       mov      ax, ds               ; save ds
 2179       mov      ds, bignum_seg       ; first move byte of n is now in ds:si
 2180       rep movsb
 2181       mov      ds, ax               ; restore ds
 2182 
 2183       ; adjust for negative values
 2184       xor      ax, ax                  ; use ax as a flag
 2185       ; get sign flag                  ; top byte is still in bl
 2186       and      bl, 10000000b           ; isolate the sign bit
 2187       jz       not_neg_16
 2188       neg      word ptr value[0]       ; take the negative of the 9 byte number
 2189       cmc                              ; toggle carry flag
 2190       not      word ptr value[2]
 2191       adc      word ptr value[2], 0
 2192       not      word ptr value[4]
 2193       adc      word ptr value[4], 0
 2194       not      word ptr value[6]
 2195       adc      word ptr value[6], 0
 2196       not      byte ptr value[8]       ; notice this last one is byte ptr
 2197       adc      byte ptr value[8], 0
 2198       jnc      not_neg_16              ; normal
 2199       mov      byte ptr value[8], 10000000b    ;n was FFFF...0000...
 2200       inc      ax                      ; set ax to 1 to flag this special case
 2201 
 2202 not_neg_16:
 2203       sub      dx, bnlength            ; adjust exponent
 2204       add      dx, intlength           ; adjust exponent
 2205       mov      cl, 3
 2206       shl      dx, cl                  ; 256^n=2^8n
 2207       add      dx, ax                  ; see special case above
 2208       ; Shift until most signifcant bit is set.
 2209 top_shift_16:
 2210       test     byte ptr value[8], 10000000b  ; test msb
 2211 ;      jnz      bottom
 2212       jz       over_bottom
 2213       jmp      bottom
 2214 over_bottom:
 2215       dec      dx                      ; decrement exponent
 2216       shl      word ptr value[0], 1    ; shift left the 9 byte number
 2217       rcl      word ptr value[2], 1
 2218       rcl      word ptr value[4], 1
 2219       rcl      word ptr value[6], 1
 2220       rcl      byte ptr value[8], 1    ; notice this last one is byte ptr
 2221       jmp      top_shift_16
 2222 
 2223 ; don't bother rounding, not really needed while speed is.
ENDIF

IFDEF BIG32
use_32_bit:
.386
      ; clear value
      mov      dword ptr value[0], 0
      mov      dword ptr value[4], 0
      mov      word ptr value[8],  0
      mov      byte ptr value[10], 0

      ; copy bytes from n to value  ; es:si still holds first move byte of n
      lea      di, value+9
      sub      di, cx               ; cx holds movebytes
      mov      ax, ss               ; move ss to es
      mov      es, ax               ; value[9] is in es:di
      mov      ax, ds               ; save ds
      mov      ds, bignum_seg       ; first move byte of n is now in ds:si
      rep movsb
      mov      ds, ax               ; restore ds

      ; adjust for negative values
      xor      ax, ax                  ; use ax as a flag
      ; get sign flag                  ; top byte is still in bl
      and      bl, 10000000b           ; determine sign
      jz       not_neg_32
      neg      dword ptr value[0]      ; take the negative of the 9 byte number
      cmc                              ; toggle carry flag
      not      dword ptr value[4]
      adc      dword ptr value[4], 0
      not      byte ptr value[8]       ; notice this last one is byte ptr
      adc      byte ptr value[8], 0
      jnc      not_neg_32              ; normal
      mov      byte ptr value[8], 10000000b    ;n was FFFF...0000...
      inc      ax                      ; set ax to 1 to flag this special case

not_neg_32:
      sub      dx, bnlength            ; adjust exponent
      add      dx, intlength           ; adjust exponent
      shl      dx, 3                   ; 256^n=2^8n
      add      dx, ax                  ; see special case above
      ; Shift until most signifcant bit is set.
top_shift_32:
      test     byte ptr value[8], 10000000b  ; test msb
      jnz      bottom
      dec      dx                      ; decrement exponent
      shl      dword ptr value[0], 1   ; shift left the 9 byte number
      rcl      dword ptr value[4], 1
      rcl      byte ptr value[8], 1    ; notice this last one is byte ptr
      jmp      top_shift_32

; don't bother rounding, not really needed while speed is.
 2224 ENDIF
 2225 
 2226 bottom:
 2227 .8086
 2228       ; adjust exponent
 2229       add      dx, 3FFFh+7-8           ; unbiased -> biased, + adjusted
 2230       or       dh, bl                  ; set sign bit if set
 2231       mov      word ptr value[9], dx
 2232 
 2233       ; unlike float and double, long double is returned on fpu stack
 2234       fld      real10 ptr value[1]    ; load return value
 2235 return:
 2236       ret
 2237 
 2238 bntofloat   endp
 2239 
 2240 ;
 2241 ; LDBL floattobn(bf_t n, LDBL f) is in BIGNUM.C
 2242 ;
 2243 
 2244 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 2245 ; These last two functions do not use bignum type numbers, but take
 2246 ; long doubles as arguments.  These routines are called by the C code.
 2247 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 2248 
 2249 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 2250 ; LDBL extract_256(LDBL f, int *exp_ptr)
 2251 ;
 2252 ; extracts the mantissa and exponant of f
 2253 ; finds m and n such that 1<=|m|<256 and f = m*256^n
 2254 ; n is stored in *exp_ptr and m is returned, sort of like frexp()
 2255 
 2256 extract_256   PROC f:real10, exp_ptr: ptr sword
 2257 local  expon:sword, exf:real10, tmp_word:word
 2258 
 2259         fld     f               ; f
 2260         ftst                    ; test for zero
 2261         fstsw   tmp_word
 2262         fwait
 2263         mov     ax,tmp_word
 2264         sahf
 2265         jnz     not_zero        ; proceed
 2266 
 2267         mov     bx, exp_ptr
 2268         mov     word ptr [bx], 0    ; save = in *exp_ptr
 2269         jmp     bottom          ; f, which is zero, is already on stack
 2270 
 2271 not_zero:
 2272 
 2273 ; since a key fpu operation, fxtract, is not emulated by the MS floating
 2274 ; point library, separate code is included under use_emul:
 2275         cmp     fpu, 0
 2276         je      use_emul
 2277 
 2278                                 ; f is already on stack
 2279         fxtract                 ; mant exp, where f=mant*2^exp
 2280         fxch                    ; exp mant
 2281         fistp   expon           ; mant
 2282         fwait
 2283         mov     ax, expon
 2284         mov     dx, ax          ; make copy for later use
 2285 
 2286         cmp     ax, 0           ;
 2287         jge     pos_exp         ; jump if exp >= 0
 2288 
 2289                                 ; exp is neg, adjust exp
 2290         add     ax, 8           ; exp+8
 2291 
 2292 pos_exp:
 2293 ; adjust mantissa
 2294         and     ax, 7           ; ax mod 8
 2295         jz      adjust_exponent ; don't bother with zero adjustments
        mov     expon, ax       ; use expon as a temp var
        fild    expon           ; exp mant

        fxch                    ; mant exp
        fscale                  ; mant*2^exp exp
        fstp    st(1)           ; mant*2^exp (store in 1 and pop)

adjust_exponent:
        mov     cl, 3
        sar     dx, cl          ; exp / 8
        mov     bx, exp_ptr
        mov     [bx], dx        ; save in *exp_ptr

        fwait
        jmp     bottom


use_emul:
; emulate above code by direct manipulation of 80 bit floating point format
                                    ; f is already on stack
        fstp    exf

        mov     ax, word ptr exf+8  ; get word with the exponent in it
        mov     dx, ax              ; make copy for later use

        and     dx, 8000h           ; keep just the sign bit
        or      dx, 3FFFh           ; 1<=f<2

        and     ax, 7FFFh           ; throw away the sign bit
        sub     ax, 3FFFh           ; unbiased -> biased
        mov     bx, ax
        cmp     bx, 0
        jge     pos_exp_emul
        add     bx, 8               ; adjust negative exponent
pos_exp_emul:
        and     bx, 7               ; bx mod 8
        add     dx, bx
        mov     word ptr exf+8, dx  ; put back word with the exponent in it

        mov     cl, 3
        sar     ax, cl              ; div by 8,  2^(8n) = 256^n
        mov     bx, exp_ptr
        mov     [bx], ax            ; save in *exp_ptr

        fld     exf                 ; for return value

bottom:
        ; unlike float and double, long double is returned on fpu stack
        ret
extract_256   ENDP

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; LDBL scale_256( LDBL f, int n );
; calculates and returns the value of f*256^n
; sort of like ldexp()
;
; n must be in the range -2^12 <= n < 2^12 (2^12=4096),
; which should not be a problem

scale_256   PROC f:real10, n: sword

        cmp     n, 0
        jne     non_zero
        fld     f
        jmp     bottom          ; don't bother with scales of zero
 2296 
 2297 non_zero:
 2298         mov     cl, 3
 2299         shl     n, cl           ; 8n
 2300         fild    n               ; 8n
 2301         fld     f               ; f 8n
 2302 ; the fscale range limits for 8087/287 processors won't be a problem here
        fscale                  ; new_f=f*2^(8n)=f*256^n  8n
        fstp    st(1)           ; new_f

bottom:
        ; unlike float and double, long double is returned on fpu stack
        ret
scale_256   ENDP

END