C:\Data\fractdev\fractint\fractint\dos\calcmand.asm

File: dos\calcmand.asm

    1 ;       CALCMAND.ASM - Mandelbrot/Julia Set calculation Routines
    2 
    3 ;       This module runs as part of an overlay with calcfrac.c.
    4 ;       It must not be called from anywhere other than calcfrac.
    5 
    6 ;       The routines in this code perform Mandelbrot and Julia set
    7 ;       calculations using 32-bit integer math as opposed to the
    8 ;       "traditional" floating-point approach.
    9 
   10 ;       This code relies on several tricks to run as quickly as it does.
   11 
   12 ;       One can fake floating point arithmetic by using integer
   13 ;       arithmetic and keeping track of the implied decimal point
   14 ;       if things are reasonable -- and in this case, they are.
   15 ;       I replaced code that looked like: z = x*y with code that
   16 ;       looks like:
   17 ;                       ix = x * ifudge                 (outside the loops)
   18 ;                       iy = y * ifudge
   19 ;                       ....
   20 ;                       iz = (ix * iy) / ifudge         (inside the loops)
   21 ;       (and keep remembering that all the integers are "ifudged" bigger)
   22 
   23 ;       The 386 has native 32-bit integer arithmetic, and (briefly) keeps
   24 ;       64-bit values around after 32-bit multiplies.   If the result is
   25 ;       divided down right away, you've got 64-bit arithmetic.   You just
;       have to ensure that the result after the divide is <= 32 bits long.
;       CPUs predating the 386 have to emulate 32-bit arithmetic using
;       16-bit arithmetic, which is significantly slower.

;       Dividing is slow -- but shifting is fast, and we can select our
;       "fudge factor" to be a power of two, permitting us to use that
;       method instead.   In addition, the 386 can perform 32-bit wide
;       shifting -- and even 64-bit shifts with the following logic:
;                       shdr    eax,edx,cl
;                       shr     edx,cl
;       so we make sure that our "fudge factor" is a power of 2 and shift
;       it down that way.
;       Calcmand is hardcoded for a fudge factor of 2**29.


;                                       Bert Tyler
; History since Fractint 16.0
;  (See comments with CJLT in them)
;  CJLT=Chris Lusby Taylor who has...
;
;   1. Speeded up 16 bit on 16 bit CPU
;       Minor changes, notably prescaling to fg14 before multiplying
;       instead of scaling the answer.
;       Also, I added overflow detection after adding linit, since it
;       seems this could overflow.
;       Overall effect is about 10 97538516110730176000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.000000aster on 386 with debugflag=8088
;   2. Speeded up 32 bit on 16 bit CPU
;       The macro `square' is totally rewritten, as is the logic for 2xy,
   26 ;       by prescaling x and y to fg31, not fg29. This allows us to do a
   27 ;       32 bit multiply in 3, not 4, 16 bit chunks while retaining full
   28 ;       fg29 accuracy.
   29 ;       Also, I removed lots of well-meaning but ineffective code handling
   30 ;       special cases of zeros and tidied up the handling of negative numbers,
   31 ;       so the routine is quite a bit shorter now and overall throughput of
   32 ;       Mandel is now over 40 faster on a 386 with debugflag=8088.
   33 ;       By the way, I was tempted to go the whole hog and replace x*x-y*y
   34 ;       by (x+y)*(x-y) to reduce 4 16-bit multiplys to 3, but it makes
   35 ;       escape detection a bit trickier. Another time, maybe.
   36 ;
   37 ;   3. Made maxit a dword variable. 1/18/94
   38 
   39 ;                        required for compatibility if Turbo ASM
   40 IFDEF ??version
   41 MASM51
   42 QUIRKS
   43 ENDIF
   44 
   45 .MODEL  medium,c
   46 DGROUP        group   _DATA,_DATA2
   47 
   48 .8086
   49 
   50         ; these must NOT be in any segment!!
   51         ; this get's rid of TURBO-C fixup errors

        extrn   keypressed:far          ; this routine is in 'general.asm'
        extrn   getakey:far             ; this routine is in 'general.asm'
        extrn   iplot_orbit:far         ; this routine is in 'calcfrac.c'
        extrn   scrub_orbit:far         ; this routine is in 'calcfrac.c'

_DATA2          segment DWORD PUBLIC 'DATA'

FUDGEFACTOR     equ     29              ; default (non-potential) fudgefactor
KEYPRESSDELAY   equ     32767   ; 7FFFh

; ************************ External variables *****************************

        extrn   fractype:word           ; == 0 if Mandelbrot set, else Julia
        extrn   inside:word             ; "inside" color, normally 1 (blue)
        extrn   outside:word            ; "outside" color, normally -1 (iter)
        extrn   creal:dword, cimag:dword ; Julia Set Constant
        extrn   delmin:dword            ; min increment - precision required
        extrn   maxit:dword             ; maximum iterations
        extrn   lm:dword                ; magnitude bailout limit
        extrn   coloriter:dword         ; iterations calculated for the pixel
        extrn   realcoloriter:dword

        extrn   row:word, col:word      ; current pixel to calc

        extrn   reset_periodicity:word  ; nonzero if to be reset
        extrn   kbdcount:word           ; keyboard counter

        extrn   cpu:word                ; cpu type: 86, 186, 286, or 386
        extrn   dotmode:word

        extrn   show_orbit:word         ; "show-orbit" flag
        extrn   orbit_ptr:word          ; "orbit pointer" flag
        extrn   periodicitycheck:word   ; no periodicity if zero
        extrn   lclosenuff:dword

        public  linitx,linity           ; caller sets these

        extrn   nextsavedincr:word              ; for incrementing AND value
        extrn   firstsavedand:dword             ; AND value
        extrn   showdot:word
        extrn   orbit_delay:word

; ************************ Internal variables *****************************

                align   4
x               dd      0               ; temp value: x
y               dd      0               ; temp value: y
;absx           dd      0               ; temp value: abs(x)
linitx          dd      0               ; initial value, set by calcfrac
linity          dd      0               ; initial value, set by calcfrac
savedx          dd      0               ; saved values of X and Y iterations
savedy          dd      0               ;  (for periodicity checks)
k               dd      0               ; iteration countdown counter
oldcoloriter    dd      0               ; prior pixel's escape time k value
   52 savedand        dd      0               ; AND value for periodicity checks
   53 savedincr       dw      0               ; flag for incrementing AND value
   54 period          db      0               ; periodicity, if in the lake
   55 
   56 _DATA2          ends
   57 
   58 .CODE
   59 
   60 ; ***************** Function calcmandasm() **********************************
   61 
   62         public  calcmandasm
   63 
   64 FRAME   MACRO regs
   65         push    bp
   66         mov     bp, sp
   67         IRP     reg, 
   68           push  reg
   69           ENDM
   70         ENDM
   71 
   72 UNFRAME MACRO regs
   73         IRP     reg, 
   74           pop reg
   75           ENDM
   76         pop bp
   77         ENDM
   78 
   79 calcmandasm     proc
   80         FRAME                    ; std frame, for TC++ overlays
   81         sub     ax,ax                   ; clear ax
   82         mov     dx,ax                   ; clear dx
   83         cmp     periodicitycheck,ax     ; periodicity checking disabled?
   84         je      initoldcolor            ;  yup, set oldcolor 0 to disable it
   85         cmp     reset_periodicity,ax    ; periodicity reset?
   86         je      short initparms         ; inherit oldcolor from prior invocation
   87         mov     ax,word ptr maxit               ; yup.  reset oldcolor to maxit-250
   88         mov     dx,word ptr maxit+2
   89         sub     ax,250                  ; (avoids slowness at high maxits)
   90         sbb     dx,0
   91 
   92 initoldcolor:
   93         mov     word ptr oldcoloriter,ax                ; reset oldcoloriter
   94         mov     word ptr oldcoloriter+2,dx              ; reset oldcoloriter
   95 
   96 initparms:
   97         mov     ax,word ptr creal       ; initialize x == creal
   98         mov     dx,word ptr creal+2     ;  ...
   99         mov     word ptr x,ax           ;  ...
  100         mov     word ptr x+2,dx         ;  ...
  101 
  102         mov     ax,word ptr cimag       ; initialize y == cimag
  103         mov     dx,word ptr cimag+2     ;  ...
  104         mov     word ptr y,ax           ;  ...
  105         mov     word ptr y+2,dx         ;  ...
  106 
  107         mov     ax,word ptr maxit               ; setup k = maxit
  108         mov     dx,word ptr maxit+2
  109         add     ax,1                    ; (+ 1)
  110         adc     dx,0
  111         mov     word ptr k,ax                   ;  (decrementing to 0 is faster)
  112         mov     word ptr k+2,dx
  113 
  114         cmp     fractype,1              ; julia or mandelbrot set?
  115         je      short dojulia           ; julia set - go there
  116 
  117 ;       (Tim wants this code changed so that, for the Mandelbrot,
  118 ;       Z(1) = (x + iy) + (a + ib).  Affects only "fudged" Mandelbrots.
  119 ;       (for the "normal" case, a = b = 0, and this works, too)
  120 ;       cmp     word ptr x,0            ; Mandelbrot shortcut:
  121 ;       jne     short doeither          ;  if creal = cimag = 0,
  122 ;       cmp     word ptr x+2,0          ; the first iteration can be emulated.
  123 ;       jne     short doeither          ;  ...
  124 ;       cmp     word ptr y,0            ;  ...
  125 ;       jne     short doeither          ;  ...
  126 ;       cmp     word ptr y+2,0          ;  ...
  127 ;       jne     short doeither          ;  ...
  128 ;       dec     k                       ; we know the first iteration passed
  129 ;       mov     dx,word ptr linitx+2    ; copy x = linitx
  130 ;       mov     ax,word ptr linitx      ;  ...
  131 ;       mov     word ptr x+2,dx         ;  ...
  132 ;       mov     word ptr x,ax           ;  ...
  133 ;       mov     dx,word ptr linity+2    ; copy y = linity
  134 ;       mov     ax,word ptr linity      ;  ...
  135 ;       mov     word ptr y+2,dx         ;  ...
  136 ;       mov     word ptr y,ax           ;  ...
  137 
  138         sub     word ptr k,1            ; we know the first iteration passed
  139         sbb     word ptr k+2,0          ; we know the first iteration passed
  140         mov     dx,word ptr linitx+2    ; add x += linitx
  141         mov     ax,word ptr linitx      ;  ...
  142         add     word ptr x,ax           ;  ...
  143         adc     word ptr x+2,dx         ;  ...
  144         mov     dx,word ptr linity+2    ; add y += linity
  145         mov     ax,word ptr linity      ;  ...
  146         add     word ptr y,ax           ;  ...
  147         adc     word ptr y+2,dx         ;  ...
  148         jmp     short doeither          ; branch around the julia switch
  149 
  150 dojulia:                                ; Julia Set initialization
  151                                         ; "fudge" Mandelbrot start-up values
  152         mov     ax,word ptr x           ; switch x with linitx
  153         mov     dx,word ptr x+2         ;  ...
  154         mov     bx,word ptr linitx      ;  ...
  155         mov     cx,word ptr linitx+2    ;  ...
  156         mov     word ptr x,bx           ;  ...
  157         mov     word ptr x+2,cx         ;  ...
  158         mov     word ptr linitx,ax      ;  ...
  159         mov     word ptr linitx+2,dx    ;  ...
  160 
  161         mov     ax,word ptr y           ; switch y with linity
  162         mov     dx,word ptr y+2         ;  ...
  163         mov     bx,word ptr linity      ;  ...
  164         mov     cx,word ptr linity+2    ;  ...
  165         mov     word ptr y,bx           ;  ...
  166         mov     word ptr y+2,cx         ;  ...
  167         mov     word ptr linity,ax      ;  ...
  168         mov     word ptr linity+2,dx    ;  ...
  169 
  170 doeither:                               ; common Mandelbrot, Julia set code
  171         mov     period,0                ; claim periodicity of 1
  172         mov     ax,word ptr firstsavedand       ; initial periodicity check
  173         mov     word ptr savedand,ax    ; initial periodicity check
  174         mov     ax,word ptr firstsavedand+2     ; initial periodicity check
  175         mov     word ptr savedand+2,ax  ; initial periodicity check
  176         mov     savedincr,1             ;  flag for incrementing periodicity
  177         mov     word ptr savedx+2,0ffffh; impossible value of "old" x
  178         mov     word ptr savedy+2,0ffffh; impossible value of "old" y
  179         mov     orbit_ptr,0             ; clear orbits
  180 
  181         dec     kbdcount                ; decrement the keyboard counter
  182         jns     nokey                   ;  skip keyboard test if still positive
  183         mov     kbdcount,10             ; stuff in a low kbd count
  184         cmp     show_orbit,0            ; are we showing orbits?
  185         jne     quickkbd                ;  yup.  leave it that way.
  186         cmp     orbit_delay,0           ; are we delaying orbits?
  187         je      slowkbd                 ;  nope.  change it.
  188         cmp     showdot,0               ; are we showing the current pixel?
  189         jge     quickkbd                ;  yup.  leave it that way.
  190 slowkbd:
  191         mov     kbdcount,5000   ; else, stuff an appropriate count val
  192         cmp     cpu,386                 ; ("appropriate" to the CPU)
  193         jae     short kbddiskadj        ;  ...
  194 ;;      cmp     word ptr delmin+2,1     ; is 16-bit math good enough?
  195         cmp     word ptr delmin+2,8     ; is 16-bit math good enough?
  196         ja      kbddiskadj              ;  yes. test less often
  197         mov     kbdcount,500    ;  no.  test more often
  198 kbddiskadj:
  199         cmp     dotmode,11              ; disk video?
  200         jne     quickkbd                ;  no, leave as is
  201         shr     kbdcount,1      ; yes, reduce count
  202         shr     kbdcount,1      ; yes, reduce count
  203 quickkbd:
  204         call    far ptr keypressed      ; has a key been pressed?
  205         cmp     ax,0                    ;  ...
  206         je      nokey                   ; nope.  proceed
  207         mov     kbdcount,0              ; make sure it goes negative again
  208         cmp     ax,'o'                  ; orbit toggle hit?
  209         je      orbitkey                ;  yup.  show orbits
  210         cmp     ax,'O'                  ; orbit toggle hit?
  211         jne     keyhit                  ;  nope.  normal key.
  212 orbitkey:
  213         call    far ptr getakey         ; read the key for real
  214         mov     ax,1                    ; reset orbittoggle = 1 - orbittoggle
  215         sub     ax,show_orbit           ;  ...
  216         mov     show_orbit,ax           ;  ...
  217         jmp     short nokey             ; pretend no key was hit
  218 keyhit: mov     ax,-1                   ; return with -1
  219         mov     dx,ax
  220         mov     word ptr coloriter,ax   ; set coloriter to -1
  221         mov     word ptr coloriter+2,dx
  222         UNFRAME                  ; pop stack frame
  223         ret                             ; bail out!
  224 
  225 nokey:
  226         cmp     show_orbit,0            ; is orbiting on?
  227         jne     no16bitcode             ;  yup.  slow down.
  228         cmp     cpu,386                 ; are we on a 386?
  229         jae     short code386bit        ;  YAY!! 386-class speed!
  230 ;;      cmp     word ptr delmin+2,1     ; OK, we're desperate.  16 bits OK?
        cmp     word ptr delmin+2,8     ; OK, we're desperate.  16 bits OK?
  231         ja      yes16bitcode            ;  YAY!  16-bit speed!
  232 no16bitcode:
  233         call    near ptr code32bit      ; BOO!! nap time.  Full 32 bit math
  234         cmp     cx,-1
  235         je      keyhit          ; key stroke, get us out of here
  236         jmp     kloopend                ;  bypass the 386-specific code.
  237 yes16bitcode:
  238         call    near ptr code16bit      ; invoke the 16-bit version
  239         cmp     cx,-1
  240         je      keyhit          ; key stroke, get us out of here
  241         jmp     kloopend                ;  bypass the 386-specific code.
  242 
  243 .386                                    ; 386-specific code starts here
  244 
  245 code386bit:
  246 ;;      cmp     word ptr delmin+2,3     ; is 16-bit math good enough?
  247         cmp     word ptr delmin+2,8     ; is 16-bit math good enough?
  248         jbe     code386_32              ; nope, go do 32 bit stuff
  249 IFDEF ??version
  250         jmp     code386_32              ; TASM screws up IMUL EBX,EBX!!
  251 ENDIF
  252 
  253         ; 16 bit on 386, now we are really gonna move
  254         movsx   esi,word ptr x+2        ; use SI for X
  255         movsx   edi,word ptr y+2        ; use DI for Y
  256         push    ebp
  257         mov     ebp,-1
  258         shl     ebp,FUDGEFACTOR-1
  259         mov     cx,FUDGEFACTOR-16
  260 
  261 kloop386_16:   ; cx=bitshift-16, ebp=overflow.mask
  262 
  263         mov     ebx,esi                 ; compute (x * x)
  264         imul    ebx,ebx                 ;  ...
  265         test    ebx,ebp                 ;
  266         jnz     short end386_16         ;  (oops.  We done.)
  267         shr     ebx,cl                  ; get result down to 16 bits
  268 
  269         mov     edx,edi                 ; compute (y * y)
  270         imul    edx,edx                 ;  ...
  271         test    edx,ebp                 ; say, did we overflow? 
  272         jnz     short end386_16         ;  (oops.  We done.)
  273         shr     edx,cl                  ; get result down to 16 bits
  274 
  275         mov     ax,bx                   ; compute (x*x - y*y) / fudge
  276         sub     bx,dx                   ;  for the next iteration
  277 
  278         add     ax,dx                   ; compute (x*x + y*y) / fudge
  279 
  280         cmp     ax,word ptr lm+2        ; while (xx+yy < lm)
  281         jae     short end386_16         ;  ...
  282 
  283         imul    edi,esi                 ; compute (y * x)
  284         shl     edi,1                   ; ( * 2 / fudge)
  285         sar     edi,cl
  286         add     di,word ptr linity+2    ; (2*y*x) / fudge + linity
  287         movsx   edi,di                  ; save as y
  288 
  289         add     bx,word ptr linitx+2    ; (from above) (x*x - y*y)/fudge + linitx
  290         movsx   esi,bx                  ; save as x
  291 
  292 ;       mov     eax,oldcoloriter                ; recall the old color
  293 ;       cmp     eax,k                   ; check it against this iter
  294 ;       jge     short chkpd386_16       ;  yup.  do periodicity check.
  295         mov     eax,k                   ; rearranged for speed
  296         cmp     eax,oldcoloriter
  297         jb      short chkpd386_16
  298 nonmax386_16:
  299 ; miraculously, k is always loaded into eax at this point
  300 ;       mov     eax,k   ; set up to test for key stroke
  301         test    eax,KEYPRESSDELAY
  302         jne     notakey1                ; don't test yet
        push    cx
        call    far ptr keypressed      ; has a key been pressed?
        pop     cx
        cmp     ax,0                    ;  ...
        je      notakey1                        ; nope.  proceed
        pop     ebp
        jmp     keyhit
notakey1:

        dec     k                       ; while (k < maxit)
        jnz     short kloop386_16       ; try, try again
end386_16:
        pop     ebp
        jmp     kloopend32              ; we done

chkpd386_16:
;       mov     eax,k   ; set up to test for save-time  ;; already loaded
        test    eax,savedand            ; save on 0, check on anything else
        jz      short chksv386_16       ;  time to save a new "old" value
        mov     bx,si                   ; load up x
        xor     bx,word ptr savedx+2    ; does X match?
        cmp     bx,word ptr lclosenuff+2 ;  truncate to appropriate precision
        ja      short nonmax386_16      ;  nope.  forget it.
        mov     bx,di                   ; now test y
        xor     bx,word ptr savedy+2    ; does Y match?
        cmp     bx,word ptr lclosenuff+2 ;  truncate to appropriate precision
        ja      short nonmax386_16      ;  nope.  forget it.
        mov     period,1                ; note that we have found periodicity
        mov     k,0     ; pretend maxit reached
        jmp     short end386_16
chksv386_16:
        mov     word ptr savedx+2,si    ; save x
        mov     word ptr savedy+2,di    ; save y
        dec     savedincr               ; time to change the periodicity?
        jnz     short nonmax386_16      ;  nope.
        shl     savedand,1              ; well then, let's try this one!
  303         inc     savedand                ;  (2**n +1)
  304         mov     ax,nextsavedincr        ; and reset the increment flag
  305         mov     savedincr,ax    ; and reset the increment flag
  306         jmp     short nonmax386_16
  307 
  308         ; 32bit on 386:
  309 code386_32:
  310         mov     esi,x                   ; use ESI for X
  311         mov     edi,y                   ; use EDI for Y
  312 
  313 ;       This is the main processing loop.  Here, every T-state counts...
  314 
  315 kloop:                                  ; for (k = 0; k <= maxit; k++)
  316 
  317         mov     eax,esi                 ; compute (x * x)
  318         imul    esi                     ;  ...
  319         shrd    eax,edx,FUDGEFACTOR     ; ( / fudge)
  320         shr     edx,FUDGEFACTOR-1       ; (complete 64-bit shift and check
  321         jne     short kloopend1         ; bail out if too high
  322         mov     ebx,eax                 ; save this for below
  323 
  324         mov     eax,edi                 ; compute (y * y)
  325         imul    edi                     ;  ...
  326         shrd    eax,edx,FUDGEFACTOR     ; ( / fudge)
  327         shr     edx,FUDGEFACTOR-1       ; (complete 64-bit shift and check
  328         jne     short kloopend1         ; bail out if too high
  329 
  330         mov     ecx,ebx                 ; compute (x*x - y*y) / fudge
  331         sub     ebx,eax                 ;  for the next iteration
  332 
  333         add     ecx,eax                 ; compute (x*x + y*y) / fudge
  334         cmp     ecx,lm                  ; while (lr < lm)
  335         jae     short kloopend1         ;  ...
  336 
  337         mov     eax,edi                 ; compute (y * x)
  338         imul    esi                     ;  ...
  339         shrd    eax,edx,FUDGEFACTOR-1   ;  ( * 2 / fudge)
  340         add     eax,linity              ;  (above) + linity
  341         mov     edi,eax                 ;  save this as y
  342 
  343 ;       (from the earlier code)         ; compute (x*x - y*y) / fudge
  344         add     ebx,linitx              ;       + linitx
  345         mov     esi,ebx                 ; save this as x
  346 
  347 ;       mov     eax,oldcoloriter                ; recall the old coloriter
  348 ;       cmp     eax,k                   ; check it against this iter
  349 ;       jge     short chkperiod1
  350         mov     eax,k                   ; rearranged for speed
  351         cmp     eax,oldcoloriter
  352         jb      short chkperiod1
  353 nonmax1:
  354         mov     eax,k
  355         test    eax,KEYPRESSDELAY
  356         jne     notakey2                ; don't test yet
        call    far ptr keypressed      ; has a key been pressed?
        cmp     ax,0                    ;  ...
        je      notakey2                        ; nope.  proceed
        jmp     keyhit
notakey2:

        dec     k                       ; while (k < maxit) (dec to 0 is faster)
        jnz     kloop           ; while (k < maxit) ...
kloopend1:
        jmp     short kloopend32        ; we done.

chkperiod1:
;       mov     eax,k           ; already done
        test    eax,savedand
        jz      short chksave1
        mov     eax,esi
        xor     eax,savedx
        cmp     eax,lclosenuff
        ja      short nonmax1
        mov     eax,edi
        xor     eax,savedy
        cmp     eax,lclosenuff
        ja      short nonmax1
        mov     period,1                ; note that we have found periodicity
        mov     k,0                     ; pretend maxit reached
        jmp     short kloopend32        ; we done.
chksave1:
        mov     eax,k
        mov     savedx,esi
        mov     savedy,edi
        dec     savedincr               ; time to change the periodicity?
        jnz     short nonmax1           ;  nope.
        shl     savedand,1              ; well then, let's try this one!
  357         inc     savedand                ;  (2**n +1)
  358         mov     ax,nextsavedincr                ; and reset the increment flag
  359         mov     savedincr,ax            ; and reset the increment flag
  360         jmp     short nonmax1
  361 
  362 kloopend32:
  363 
  364         cmp     orbit_ptr,0             ; any orbits to clear?
  365         je      noorbit32               ;  nope.
  366         call    far ptr scrub_orbit     ; clear out any old orbits
  367 noorbit32:
  368 
  369         mov     eax, k          ; set old color
  370         sub     eax,10          ; minus 10, for safety
  371         mov     oldcoloriter,eax        ; and save it as the "old" color
  372         mov     eax,maxit               ; compute color
  373         sub     eax,k                   ;  (first, re-compute "k")
  374         sub     kbdcount,ax             ; adjust the keyboard count (use ax only)
  375         cmp     eax,0                   ; convert any "outlier" region
  376         jg      short coloradjust1_32   ;  (where abs(x) > 2 or abs(y) > 2)
  377         mov     eax,1                   ;   to look like we ran through
  378 coloradjust1_32:                                ;    at least one loop.
  379         mov     realcoloriter,eax     ; result before adjustments
  380         cmp     eax,maxit             ; did we max out on iterations?
  381         jne     short notmax32        ;  nope.
  382         mov     oldcoloriter,eax      ; set "oldcolor" to maximum
  383         cmp     inside,0                ; is "inside" >= 0?
  384         jl      wedone32                ;  nope.  leave it at "maxit"
  385         sub     eax,eax
  386         mov     ax,inside               ; reset max-out color to default
  387         cmp     periodicitycheck,0      ; show periodicity matches?
  388         jge     wedone32                ;  nope.
  389 ;       mov     al,period               ;  reset color to periodicity flag
  390         cmp     period,0
  391         je      wedone32
  392         mov     ax,7                    ; use color 7 (default white)
  393         jmp     short wedone32
  394 
  395 notmax32:
  396         cmp     outside,0               ; is "outside" >= 0?
  397         jl      wedone32                ;   nope. leave as realcolor
  398         sub     eax,eax
  399         mov     ax, outside             ; reset to "outside" color
  400 
  401 wedone32:                             ;
  402         mov     coloriter,eax           ; save the color result
  403         shld    edx,eax,16              ; put result in ax,dx
  404         shr     eax,16
  405         UNFRAME                  ; pop stack frame
  406         ret                             ; and return with color
  407 
  408 
  409 .8086                                   ; 386-specific code ends here
  410 
  411 kloopend:
  412         cmp     orbit_ptr,0             ; any orbits to clear?
  413         je      noorbit2                ;  nope.
  414         call    far ptr scrub_orbit     ; clear out any old orbits
  415 noorbit2:
  416 
  417         mov     ax,word ptr k   ; set old color
  418         mov     dx,word ptr k+2 ; set old color
  419         sub     ax,10                   ; minus 10, for safety
  420         sbb     dx,0
  421         mov     word ptr oldcoloriter,ax        ; and save it as the "old" color
  422         mov     word ptr oldcoloriter+2,dx      ; and save it as the "old" color
  423         mov     ax,word ptr maxit               ; compute color
  424         mov     dx,word ptr maxit+2     ; compute color
  425         sub     ax,word ptr k           ;  (first, re-compute "k")
  426         sbb     dx,word ptr k+2         ;  (first, re-compute "k")
  427         sub     kbdcount,ax                     ; adjust the keyboard count
  428         cmp     dx,0                    ; convert any "outlier" region
  429         js      short kludge_for_julia  ;  k can be > maxit!!!
  430         ja      short coloradjust1      ;  (where abs(x) > 2 or abs(y) > 2)
  431         cmp     ax,0
  432         ja      short coloradjust1      ;  (where abs(x) > 2 or abs(y) > 2)
  433 kludge_for_julia:
  434         mov     ax,1                    ;   to look like we ran through
  435         sub     dx,dx
  436 coloradjust1:                           ;    at least one loop.
  437         mov     word ptr realcoloriter,ax     ; result before adjustments
  438         mov     word ptr realcoloriter+2,dx   ; result before adjustments
  439         cmp     dx,word ptr maxit+2           ; did we max out on iterations?
  440         jne     short notmax            ;  nope.
  441         cmp     ax,word ptr maxit             ; did we max out on iterations?
  442         jne     short notmax            ;  nope.
  443         mov     word ptr oldcoloriter,ax      ; set "oldcolor" to maximum
  444         mov     word ptr oldcoloriter+2,dx    ; set "oldcolor" to maximum
  445         cmp     inside,0                ; is "inside" >= 0?
  446         jl      wedone                  ;  nope.  leave it at "maxit"
  447         mov     ax,inside               ; reset max-out color to default
  448         sub     dx,dx
  449         cmp     periodicitycheck,0      ; show periodicity matches?
  450         jge     wedone                  ;  nope.
  451 ;       sub     ax,ax                   ; clear top half for next
  452 ;       mov     al,period               ;  reset color to periodicity flag
  453 ;        cmp     period,0
  454 ;        jz      wedone
  455         mov     ax,7                    ; use color 7 (default white)
  456         jmp     short wedone
  457 
  458 notmax:
  459         cmp     outside,0               ; is "outside" >= 0?
  460         jl      wedone                  ;   nope. leave as realcolor
  461         mov     ax, outside             ; reset to "outside" color
  462         sub     dx,dx
  463 
  464 wedone:                                 ;
  465         mov     word ptr coloriter,ax   ; save the color result
  466         mov     word ptr coloriter+2,dx   ; save the color result
  467         UNFRAME                  ; pop stack frame
  468         ret                             ; and return with color
  469 
  470 calcmandasm endp
  471 
  472 
  473 ; ******************** Function code16bit() *****************************
  474 ;
  475 ;       Performs "short-cut" 16-bit math where we can get away with it.
  476 ; CJLT has modified it, mostly by preshifting x and y to fg30 from fg29
  477 ; or, since we ignore the lower 16 bits, fg14 from fg13.
  478 ; If this shift overflows we are outside x*x+y*y=2, so have escaped.
  479 ; Also, he commented out several conditional jumps which he calculated could
  480 ; never be taken (e.g. mov ax,si / imul si ;cannot overflow).
  481 
  482 code16bit       proc    near
  483 
  484         mov     si,word ptr x+2         ; use SI for X fg13
  485         mov     di,word ptr y+2         ; use DI for Y fg13
  486 
  487 start16bit:
  488         add     si,si                   ;CJLT-Convert to fg14
  489 ;        jo      end16bit                ;overflows if <-2 or >2
  490         jno     not_end16bit1
  491         jmp     end16bit                ;overflows if <-2 or >2
  492 not_end16bit1:
  493         mov     ax,si                   ; compute (x * x)
  494         imul    si                      ; Answer is fg14+14-16=fg12
  495 ;       cmp     dx,0                    ;CJLT commented out-
  496 ;       jl      end16bit                ;CJLT-  imul CANNOT overflow
  497 ;       mov     cx,32-FUDGEFACTOR       ;CJLT. FUDGEFACTOR=29 is hard coded
  498 loop16bit1:
  499         shl     ax,1                    ;  ...
  500         rcl     dx,1                    ;  ...
  501 ;        jo      end16bit                ;  (oops.  overflow)
  502         jno     not_end16bit2
  503         jmp     end16bit                ;  (oops.  overflow)
  504 not_end16bit2:
  505 ;       loop    loop16bit1              ;CJLT...do it once only. dx now fg13.
  506         mov     bx,dx                   ; save this for a tad
  507 
  508 ;ditto for y*y...
  509 
  510         add     di,di                   ;CJLT-Convert to fg14
  511 ;        jo      end16bit                ;overflows if <-2 or >2
  512         jno     not_end16bit3
  513         jmp     end16bit                ;overflows if <-2 or >2
  514 not_end16bit3:
  515         mov     ax,di                   ; compute (y * y)
  516         imul    di                      ;  ...
  517 ;       cmp     dx,0                    ; say, did we overflow? 
  518 ;       jl      end16bit                ;  (oops.  We done.)
  519 ;       mov     cx,32-FUDGEFACTOR       ; ( / fudge)
  520 ;loop16bit2:
  521         shl     ax,1                    ;  ...
  522         rcl     dx,1                    ;  ...
  523         jo      end16bit                ;  (oops.  overflow)
  524 ;       loop    loop16bit2              ;  ...
  525 
  526         mov     cx,bx                   ; compute (x*x - y*y) / fudge
  527         sub     bx,dx                   ;  for the next iteration
  528 
  529         add     cx,dx                   ; compute (x*x + y*y) / fudge
  530         jo      end16bit                ; bail out if too high
  531 ;       js      end16bit                ;  ...
  532 
  533         cmp     cx,word ptr lm+2        ; while (xx+yy < lm)
  534         jae     end16bit                ;  ...
  535         sub     word ptr k,1    ; while (k < maxit)
  536         sbb     word ptr k+2,0
  537         jnz     notdoneyet
  538         cmp     word ptr k,0
  539         jz      end16bit                ;  we done.
  540 notdoneyet:
  541         mov     ax,di                   ; compute (y * x) fg14+14=fg28
  542         imul    si                      ;  ...
  543 ;       mov     cx,33-FUDGEFACTOR-2     ; ( * 2 / fudge)
  544 ;loop16bit3:
  545         shl     ax,1                    ;  ...
  546         rcl     dx,1                    ;  ...
  547         shl     ax,1                    ;  shift two bits
  548         rcl     dx,1                    ;  cannot overflow as |x|<=2, |y|<=2
  549 ;       loop    loop16bit3              ;  ...
  550         add     dx,word ptr linity+2    ; (2*y*x) / fudge + linity
  551         jo      end16bit                ; bail out if too high
  552         mov     di,dx                   ; save as y
  553 
  554         add     bx,word ptr linitx+2    ; (from above) (x*x - y*y)/fudge + linitx
  555         jo      end16bit                ; bail out if too high
  556         mov     si,bx                   ; save as x
  557 
  558         mov     dx,word ptr oldcoloriter+2      ; recall the old color
  559         cmp     dx,word ptr k+2                 ; check it against this iter
  560         jb      short nonmax3           ;  nope.  bypass periodicity check.
  561         mov     ax,word ptr oldcoloriter        ; recall the old color
  562         cmp     ax,word ptr k                   ; check it against this iter
  563         jb      short nonmax3           ;  nope.  bypass periodicity check.
  564         mov     word ptr x+2,si         ; save x for periodicity check
  565         mov     word ptr y+2,di         ; save y for periodicity check
  566         call    checkperiod             ; check for periodicity
  567 nonmax3:
  568         mov     ax,word ptr k   ; set up to test for key stroke
  569         test    ax,KEYPRESSDELAY
  570         jne     notakey3                ; don't test yet
        push    cx
        push    bx
        call    far ptr keypressed      ; has a key been pressed?
        pop     bx
        pop     cx
        cmp     ax,0                    ;  ...
        je      notakey3                        ; nope.  proceed
        mov     cx,-1
        jmp     short end16bitgotkey    ; cx set, jump to end
notakey3:
        jmp     start16bit              ; try, try again.

end16bit:                               ; we done.
        xor     cx,cx                   ; no key so zero cx
end16bitgotkey:                         ; jump here if key
        ret
code16bit       endp


;       The following routine checks for periodic loops (a known
;       method of decay inside "Mandelbrot Lake", and an easy way to
;       bail out of "lake" points quickly).  For speed, only the
;       high-order sixteen bits of X and Y are checked for periodicity.
;       For accuracy, this routine is only fired up if the previous pixel
;       was in the lake (which means that the FIRST "wet" pixel was
;       detected by the dull-normal maximum iteration process).

checkperiod     proc near               ; periodicity check
        mov     ax,word ptr k   ; set up to test for save-time
        test    ax,word ptr savedand    ; save on 0, check on anything else
        jnz     notimeyet               ;  NOT time to save a new "old" value
        mov     dx,word ptr k+2 ; set up to test for save-time
        test    dx,word ptr savedand+2  ; save on 0, check on anything else
        jz      checksave               ;  time to save a new "old" value
notimeyet:
        mov     dx,word ptr x+2         ; load up x
        xor     dx,word ptr savedx+2
        cmp     dx,word ptr lclosenuff+2
        ja      checkdone
        mov     ax,word ptr x           ; load up x
        xor     ax,word ptr savedx
        cmp     ax,word ptr lclosenuff
        ja      checkdone
        mov     dx,word ptr y+2         ; load up y
        xor     dx,word ptr savedy+2
        cmp     dx,word ptr lclosenuff+2
        ja      checkdone
        mov     ax,word ptr y           ; load up y
        xor     ax,word ptr savedy
        cmp     ax,word ptr lclosenuff
        ja      checkdone
        mov     period,1                ; note that we have found periodicity
        mov     word ptr k,1    ; pretend maxit reached
        mov     word ptr k+2,0  ; pretend maxit reached
checksave:
        mov     dx,word ptr x+2         ; load up x
        mov     word ptr savedx+2,dx    ;  and save it
        mov     ax,word ptr x           ; load up x
        mov     word ptr savedx,ax      ;  and save it
        mov     dx,word ptr y+2         ; load up y
        mov     word ptr savedy+2,dx    ;  and save it
        mov     ax,word ptr y           ; load up y
        mov     word ptr savedy,ax      ;  and save it
        dec     savedincr               ; time to change the periodicity?
        jnz     checkdone               ;  nope.
        shl     word ptr savedand,1     ; well then, let's try this one!
  571         rcl     word ptr savedand+2,1           ; well then, let's try this one!
        add     word ptr savedand,1             ;  (2**n +1)
        adc     word ptr savedand+2,0           ;  (2**n +1)
        mov     ax,nextsavedincr                ; and reset the increment flag
        mov     savedincr,ax            ; and reset the increment flag
checkdone:
        ret                             ; we done.
checkperiod     endp


; ******************** Function code32bit() *****************************
;
;       Perform the 32-bit logic required using 16-bit logic
;
;       New twice as fast logic,
;          Courtesy of Bill Townsend and Mike Gelvin (CIS:73337,520)
;       Even newer, faster still by Chris Lusby Taylor
;        who noted that we needn't square the low word if we first multiply
  572 ;        by 4, since we only need 29 places after the point and this will
  573 ;        give 30. (We divide answer by two to give 29 bit shift in answer)
  574 ;       Also, he removed all testing for special cases where a word of operand
  575 ;       happens to be 0, since testing 65536 times costs more than the saving
  576 ;       1 time out of 65536! (I benchmarked it. Just removing the tests speeds
  577 ;       us up by 3.)
  578 ;
  579 ;Note that square returns DI,AX squared in DX,AX now.
  580 ; DI,AX is first converted to unsigned fg31 form.
  581 ; (For its square to be representable in fg29 (range -4..+3.999)
  582 ; DI:AX must be in the range 0..+1.999 which fits neatly into unsigned fg31.)
  583 ; This allows us to ignore the part of the answer corresponding to AX*AX as it
  584 ; is less than half a least significant bit of the final answer.
  585 ; I thought you'd like that.
;
; As we prescaled DI:AX, we need to shift the answer 1 bit to the right to
; end up in fg29 form since 29=(29+2)+(29+2)-32-1
; However, the mid term AX*DI is needed twice, so the shifts cancel.
;
; Since abs(x) and abs(y) in fg31 form will be needed in calculating 2*X*Y
; we push them onto the stack for later use.

; Note that square does nor affect bl,si,bp
; and leaves highword of argument in di
; but destroys bh,cx
square  MACRO   donepops
        LOCAL   notneg
        shl     ax,1            ;Multiply by 2 to convert to fg30
        rcl     di,1            ;If this overflows DI:AX was negative
        jnc     notneg
        not     ax                      ; so negate it
        not     di                      ; ...
        add     ax,1                    ; ...
        adc     di,0                    ; ...
        not     bl                      ; change negswt
notneg: shl     ax,1            ;Multiply by 2 again to give fg31
        rcl     di,1            ;If this gives a carry then DI:AX was >=2.0
                                ;If its high bit is set then DI:AX was >=1.0
                                ;This is OK, but note that this means that
                                ;DI:AX must now be treated as unsigned.
        jc      donepops
        push    di              ; save y or x (in fg31 form) on stack
        push    ax              ; ...
        mul     di              ;GET MIDDLE PART - 2*A*B
        mov     bh,ah           ;Miraculously, it needs no shifting!
        mov     cx,dx
        mov     ax,di
        mul     ax              ;SQUARE HIGH HWORD - A*A
        shl     bh,1            ;See if we round up
        adc     ax,1            ;Anyway, add 1 to round up/down accurately
        adc     dx,0
        shr     dx,1            ;This needs shifting one bit
        rcr     ax,1
        add     ax,cx           ;Add in the 2*A*B term
        adc     dx,0
        ENDM    ;#EM


code32bit       proc near
;
; BL IS USED FOR THE "NEGSWT" FLAG
;   NEGSWT IS ONE IF EITHER "X" OR "Y" ARE NEGATIVE, BUT NOT BOTH NEGATIVE
;

        push    bp
        xor     bl,bl                   ; NEGSWT STARTS OFF ZERO

;       iteration loop

nextit: mov     ax,word ptr y           ; ax=low(y)
        mov     di,word ptr y+2         ; di=high(y)
        square done1    ;square y and quit via done1 if it overflows
        mov     si,ax           ; square returns results in dx,ax
        mov     bp,dx           ; save y*y in bp,si
        mov     ax,word ptr x
        mov     di,word ptr x+2
        square  done2           ; square x and quit via done2 if it overflows
        mov     cx,ax           ; Save low answer in cx.
        ADD     ax,si           ; calc y*y + x*x
        mov     ax,bp
        ADC     ax,dx           ;  ...
        jno     nextxy          ; overflow?
                                ;NOTE: The original code tests against lm
                                ;here, but as lm=4<<29 this is the same
                                ;as testing for signed overflow
done4:  add     sp,4            ; discard saved value of |x| fg 31
done2:  add     sp,4            ; discard saved value of |y| fg 31
done1:  xor     cx,cx           ; no key exit, zero cx
done0:                          ; exit here if key hit
        pop     bp              ; restore saved bp
        ret

;---------------------------------------------------------------------------

nextxy: sub     word ptr k,1    ; while (k < maxit)
        sbb     word ptr k+2,0
        jnz     tryagain
        cmp     word ptr k,0
        jz      done4           ;  we done.
tryagain:
        sub     cx,si                   ; subtract y*y from x*x
        sbb     dx,bp                   ;  ...
        add     cx,word ptr linitx      ; add "A"
        adc     dx,word ptr linitx+2    ;  ...
        jo      done4                   ;CJLT-Must detect overflow here
                                        ; but increment loop count first
        mov     word ptr x,cx           ; store new x = x*x-y*y+a
        mov     word ptr x+2,dx         ;  ...

; now calculate x*y
;
;More CJLT tricks here. We use the pushed abs(x) and abs(y) in fg31 form
;which, when multiplied, give x*y in fg30, which, luckily, is the same as...
;2*x*y fg29.
;As with squaring, we can ignore the product of the low order words, and still
;be more accurate than the original algorithm.
;
        pop     bp              ;Xlow
        pop     di              ;Xhigh (already there, actually)
        pop     ax              ;Ylow
        mul     di              ;Xhigh * Ylow
        mov     bh,ah           ;Discard lowest 8 bits of answer
        mov     cx,dx
        pop     ax              ;Yhigh
        mov     si,ax           ; copy it
        mul     bp              ;Xlow * Yhigh
        xor     bp,bp           ;Clear answer
        add     bh,ah
        adc     cx,dx
        adc     bp,0
        mov     ax,si           ;Yhigh
        mul     di              ;Xhigh * Yhigh
        shl     bh,1            ;round up/down
        adc     ax,cx           ;Answer-low
        adc     dx,bp           ;Answer-high
                                ;NOTE: The answer is 0..3.9999 in fg29
        js      done1           ;Overflow if high bit set
        or      bl,bl           ; ZERO IF NONE OR BOTH X , Y NEG
        jz      signok          ; ONE IF ONLY ONE OF X OR Y IS NEG
        not     ax              ; negate result
        not     dx              ;  ...
        add     ax,1            ;  ...
        adc     dx,0            ;  ...
        xor     bl,bl           ;Clear negswt
signok:
        add     ax,word ptr linity
        adc     dx,word ptr linity+2    ; dx,ax = 2(X*Y)+B
        jo      done1
        mov     word ptr y,ax           ; save the new value of y
        mov     word ptr y+2,dx         ;  ...
        mov     dx,word ptr oldcoloriter+2      ; recall the old color
        cmp     dx,word ptr k+2         ; check it against this iter
        jb      short chkkey4           ;  nope.  bypass periodicity check.
        mov     ax,word ptr oldcoloriter        ; recall the old color
        cmp     ax,word ptr k           ; check it against this iter
        jb      short chkkey4           ;  nope.  bypass periodicity check.
        call    checkperiod             ; check for periodicity

chkkey4:
        mov     ax,word ptr k   ; set up to test for key stroke
        test    ax,KEYPRESSDELAY
        jne     notakey4                ; don't test yet
  586         push    cx
  587         push    bx
  588         call    far ptr keypressed      ; has a key been pressed?
  589         pop     bx
  590         pop     cx
  591         cmp     ax,0                    ;  ...
  592         je      notakey4                ; nope.  proceed
  593         mov     cx,-1
  594         jmp     done0                   ; cx set, jump to very end
  595 notakey4:
  596 
  597 chkmaxit:
  598         cmp     show_orbit,0            ; orbiting on?
  599         jne     horbit                  ;  yep.
  600         jmp     nextit                  ;go around again
  601 
  602 horbit: push    bx                      ; save my flags
  603         mov     ax,-1                   ; color for plot orbit
  604         push    ax                      ;  ...
  605         push    word ptr y+2            ; co-ordinates for plot orbit
  606         push    word ptr y              ;  ...
  607         push    word ptr x+2            ;  ...
  608         push    word ptr x              ;  ...
  609         call    far ptr iplot_orbit     ; display the orbit
  610         add     sp,5*2                  ; clear out the parameters
  611         pop     bx                      ; restore flags
  612         jmp     nextit                  ; go around again
  613 
  614 code32bit       endp
  615 
  616            end
  617