File: dos\calmanp5.asm

    1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    2 ; calmanp5.asm - pentium floating point version of the calcmand.asm file
    3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    4 
    5 ; This code started from calmanfp.asm as a base.  This provided the code that
    6 ; takes care of the overhead that is needed to interface with the Fractint
    7 ; engine.  The initial pentium optimizations where provided by Daniele
    8 ; Paccaloni back in March of 1995.  For whatever reason, these optimizations
    9 ; didn't make it into version 19.6, which was released in May of 1997.  In
; July of 1997, Tim Wegner brought to my attention an article by Agner Fog
; Titled "Pentium Optimizations".  This article can currently be found at:

; http://www.azillionmonkeys.com/qed/p5opt.html

; It's a good article that claims to compare the Mandelbrot FPU code similar
   10 ; to what is in Fractint with his (and other's) pentium optimized code.  The
; only similarity I was able to find was they both calculate the Mandelbrot
; set.  Admittedly, the Fractint FPU Mandelbrot code was not optimized for a
; pentium.  So, taking the code segments provided by Agner Fog, Terje Mathisen,
; Thomas Jentzsch, and Damien Jones, I set out to optimize Fractint's FPU
   11 ; Mandelbrot code.  Unfortunately, it is not possible to just drop someone
   12 ; elses Mandelbrot code into Fractint.  I made good progress, but lost
   13 ; interest after several months.
   14 
   15 ; In April of 1998, Rees Acheson (author of MANDELB), contacted me about
   16 ; included his pentium optimized Mandelbrot code in the next release of
   17 ; Fractint.  This started a flurry of correspondence resulting in
   18 ; faster code in Fractint and faster code in MANDELB.  His code didn't
; drop right in, but his input and feedback are much appreciated.  The
; code in this file is largely due to his efforts.

; July 1998, Jonathan Osuch
;
; Updated 10 Oct 1998 by Chuck Ebbert (CAE) -- 5.17Uܓējpeed gain on a P133
; Fixed keyboard/periodicity conflict JCO  10 DEC 1999
;



;                        required for compatibility if Turbo ASM
IFDEF ??version
MASM51
QUIRKS
ENDIF

.MODEL medium,c

.486

; external functions
EXTRN   keypressed:FAR          ; this routine is in 'general.asm'
EXTRN   getakey:FAR             ; this routine is in 'general.asm'
EXTRN   plot_orbit:FAR          ; this routine is in 'fracsubr.c'
EXTRN   scrub_orbit:FAR         ; this routine is in 'fracsubr.c'

; external data
EXTRN init:WORD                 ; declared as type complex
EXTRN parm:WORD                 ; declared as type complex
EXTRN new:WORD                  ; declared as type complex
EXTRN maxit:DWORD
EXTRN inside:WORD
EXTRN outside:WORD
EXTRN rqlim:QWORD               ; bailout (I never did figure out
                                ;   what "rqlim" stands for. -Wes)
EXTRN coloriter:DWORD
EXTRN oldcoloriter:DWORD
EXTRN realcoloriter:DWORD
EXTRN periodicitycheck:WORD
EXTRN reset_periodicity:WORD
EXTRN closenuff:QWORD
EXTRN fractype:WORD             ; Mandelbrot or Julia
EXTRN kbdcount:WORD            ; keyboard counter
EXTRN dotmode:WORD
EXTRN show_orbit:WORD           ; "show-orbit" flag
EXTRN orbit_ptr:WORD            ; "orbit pointer" flag
EXTRN magnitude:QWORD           ; when using potential
extrn   nextsavedincr:word              ; for incrementing AND value
extrn   firstsavedand:dword             ; AND value
extrn   bad_outside:word        ; old FPU code with bad: real,imag,mult,summ
extrn   save_release:word
extrn   showdot:WORD
extrn   orbit_delay:WORD
extrn   atan_colors:word

JULIAFP  EQU 6                  ; from FRACTYPE.H
MANDELFP EQU 4
KEYPRESSDELAY equ 16383         ; 3FFFh

initx    EQU    ; just to make life easier
inity    EQU 
parmx    EQU 
parmy    EQU 
newx     EQU 
newy     EQU 

.DATA
EVEN
orbit_real              DQ  ?
orbit_imag              DQ  ?
round_down_half         DD  0.5
tmp_dword               DD  ?
inside_color            DD  ?
periodicity_color       DD  7
savedincr               EQU     DI      ; space, but it doesn't hurt either
   19 savedand_p5             EQU     EDX
   20 
   21 calmanp5_text SEGMENT PARA PUBLIC USE16 'CODE'
   22 ASSUME cs:calmanp5_text
   23 ALIGN 16
   24 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
   25 ; This routine is called once per image.
   26 ; Put things here that won't change from one pixel to the next.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
PUBLIC calcmandfpasmstart_p5
calcmandfpasmstart_p5   PROC

        sub     eax,eax
        mov     ax,inside
        cmp     ax,0                    ; if (inside color == maxiter)
        jnl     non_neg_inside
        mov     eax,maxit               ;   use maxit as inside_color

non_neg_inside:                         ; else
        mov     inside_color,eax        ;   use inside as inside_color

        cmp     periodicitycheck,0      ; if periodicitycheck < 0
        jnl     non_neg_periodicitycheck
        mov     eax,7                   ;   use color 7 (default white)
non_neg_periodicitycheck:               ; else
        mov     periodicity_color,eax   ; use inside_color still in ax
        mov     oldcoloriter,0          ; no periodicity checking on 1st pixel
        ret
calcmandfpasmstart_p5       ENDP

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; pentium floating point version of calcmandasm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
PUBLIC calcmandfpasm_p5
calcmandfpasm_p5  PROC
ALIGN 16
LOCAL  savedx_p5:QWORD, savedy_p5:QWORD
LOCAL  closenuff_p5:QWORD

; Register usage:  eax: ??????    ebx:oldcoloriter
;                  ecx:counter    edx:savedand_p5
;                  di:savedincr   esi:0ffffh
ALIGN 16
;    {Set up FPU stack for quick access while in the loop}
; initialization stuff
        sub     eax,eax                 ; clear eax
;        cmp     periodicitycheck,ax     ; periodicity checking?
        cmp     periodicitycheck,0     ; periodicity checking?
        je      initoldcolor            ;  no, set oldcolor 0 to disable it
;        cmp     reset_periodicity,ax    ; periodicity reset?
        cmp     reset_periodicity,0    ; periodicity reset?
        je      initparms               ;  no, inherit oldcolor from prior invocation
        mov     eax,maxit               ; yup.  reset oldcolor to maxit-250
        sub     eax,250                 ; (avoids slowness at high maxits)

initoldcolor:
        mov     oldcoloriter,eax   ; reset oldcolor

initparms:
        fld     closenuff
        fstp    closenuff_p5
        fldz
;        sub     eax,eax                   ; clear ax for below
        mov     orbit_ptr,0             ; clear orbits
        mov     savedand_p5,1           ; edx = savedand_p5 = 1
        fst     savedx_p5      ; savedx = 0.0
        fstp    savedy_p5      ; savedy = 0.0
;        mov     savedincr,dx
        mov     edi,savedand_p5          ; savedincr is in edi = 1
;        mov     savedincr,1             ; savedincr = 1
;        mov     edx,firstsavedand

        mov     esi,0FFFFh
        dec     kbdcount                ; decrement the keyboard counter
        jns     short nokey        ;  skip keyboard test if still positive
        mov     kbdcount,10             ; stuff in a low kbd count
        cmp     show_orbit,0            ; are we showing orbits?
        jne     quickkbd                ;  yup.  leave it that way.
        cmp     orbit_delay,0           ; are we delaying orbits?
        je      slowkbd                 ;  nope.  change it.
        cmp     showdot,0               ; are we showing the current pixel?
        jge     quickkbd                ;  yup.  leave it that way.
;this may need to be adjusted, I'm guessing at the "appropriate" values -Wes
   27 slowkbd:
   28         mov     kbdcount,5000           ; else, stuff an appropriate count val
   29         cmp     dotmode,11              ; disk video?
   30         jne     quickkbd                ;  no, leave as is
   31         shr     kbdcount,2              ; yes, reduce count
   32 
   33 quickkbd:
   34         call    far ptr keypressed      ; has a key been pressed?
   35         cmp     ax,0                    ;  ...
   36         je      nokey                   ; nope.  proceed
   37         mov     kbdcount,0              ; make sure it goes negative again
   38         cmp     ax,'o'                  ; orbit toggle hit?
   39         je      orbitkey                ;  yup.  show orbits
   40         cmp     ax,'O'                  ; orbit toggle hit?
   41         jne     keyhit                  ;  nope.  normal key.
   42 orbitkey:
   43         call    far ptr getakey         ; read the key for real
   44         mov     eax,1                   ; reset orbittoggle = 1 - orbittoggle
   45         sub     ax,show_orbit           ;  ...
   46         mov     show_orbit,ax           ;  ...
   47         jmp     short nokey             ; pretend no key was hit
   48 keyhit:
   49         fninit
   50         mov     eax,-1                   ; return with -1
   51         mov     coloriter,eax            ; set color to -1
   52         mov     edx,eax                  ; put results in ax,dx
   53 ;        shr     edx,16                  ; all 1's anyway, don't bother w/shift
   54         ret                             ; bail out!
   55 nokey:
   56 
   57         mov     ecx,maxit               ; initialize counter
   58         mov     ebx,oldcoloriter
   59 
   60         cmp     fractype,JULIAFP        ; julia or mandelbrot set?
   61         je      dojulia_p5              ; julia set - go there
   62 
   63 ; Mandelbrot _p5 initialization of stack
   64         dec     ecx                     ;  always do one already
   65                                         ; the fpu stack is shown below
   66                                         ; st(0) ... st(7)
   67 
   68         fld     initx                   ; Cx
   69         fld     inity                   ; Cy Cx
   70         fld     rqlim                   ; b Cy Cx
   71         fld     st(2)                   ; Cx b Cy Cx
   72         fadd    parmx                   ; Px+Cx b Cy Cx
   73         fld     st(0)                   ; Px+Cx Px+Cx b Cy Cx
   74         fmul    st,st                   ; (Px+Cx)^2 Px+Cx b Cy Cx
   75         fld     st(3)                   ; Cy (Px+Cx)^2 Px+Cx b Cy Cx
   76         fadd    parmy                   ; Py+Cy (Px+Cx)^2 Px+Cx b Cy Cx
   77         jmp     bottom_of_dojulia_p5
   78 
   79 EVEN
   80 DoKeyCheck:
   81         push    eax
   82         push    ecx
   83         push    ebx
   84         call    far ptr keypressed      ; has a key been pressed?
   85         pop     ebx
   86         pop     ecx
   87 ;        cmp     ax,0                    ;  ...
   88         or      ax,ax
   89         je      SkipKeyCheck            ; nope.  proceed
   90         pop     eax
   91         jmp     keyhit
   92 
   93 ALIGN 16
   94 save_new_old_value:
   95         fld     st(2)                   ; y y^2 x^2 y x b Cy Cx
   96         fstp    savedy_p5               ; y^2 x^2 y x b Cy Cx
   97         fld     st(3)                   ; x y^2 x^2 y x b Cy Cx
   98         fstp    savedx_p5               ; y^2 x^2 y x b Cy Cx
   99         dec     savedincr               ; time to lengthen the periodicity?
  100         jnz     JustAfterFnstsw    ; if not 0, then skip
  101 ;        add     edx,edx            ; savedand = (savedand * 2) + 1
  102 ;        inc     edx                ; for longer periodicity
  103         lea     savedand_p5,[savedand_p5*2+1]
  104         mov     savedincr,nextsavedincr       ; and restart counter
  105 
  106 ;        test    cx,KEYPRESSDELAY       ; ecx holds the loop count
  107 ;        test    cx,0FFFFh
  108 ;        test    ecx,esi                ; put 0FFFFh into esi above
  109         test    cx,si
  110         jz      DoKeyCheck
  111         jmp     JustAfterFnstsw
  112 
  113 SkipKeyCheck:
  114         pop     eax
  115         jmp     JustAfterFnstsw
  116 
  117 ALIGN 16
  118 do_check_p5_fast:
  119 ;        call    near ptr periodicity_check_p5  ; y x b Cy Cx
  120 ; REMEMBER, the cx counter is counting BACKWARDS from maxit to 0
  121                                         ; fpu stack is
  122                                         ; y2 x2 y x b Cy Cx
  123         fld     savedx_p5               ; savedx y2 x2 y x ...
  124         fsub    st(0),st(4)             ; x-savedx y2 x2 y x ...
  125         fabs                            ; |x-savedx| y2 x2 y x ...
  126         fcomp   closenuff_p5            ; y2 x2 y x ...
  127         push    ax                      ; push AX for later
  128         fnstsw  ax
  129         and     ah,41h                  ; if |x-savedx| > closenuff
  130         jz      per_check_p5_ret_fast   ; we're done
        fld     savedy_p5               ; savedy y2 x2 y x ...
        fsub    st(0),st(3)             ; y-savedy y2 x2 y x ...
        fabs                            ; |y-savedy| y2 x2 y x ...
        fcomp   closenuff_p5            ; y2 x2 y x ...
        fnstsw  ax
        and     ah,41h                  ; if |y-savedy| > closenuff
        jz      per_check_p5_ret_fast   ; we're done
  131                                        ; caught a cycle!!!
  132         pop     ax                     ; undo push
  133         fcompp                         ; pop off y2 and x2, leaving y x ...
  134         mov     eax,maxit
  135         mov     oldcoloriter,-1        ; check periodicity immediately next time
  136         mov     realcoloriter,eax      ; save unadjusted realcolor as maxit
  137         mov     eax,periodicity_color  ; set color
  138         jmp     overiteration_p5
  139 
  140 
  141 dojulia_p5:
  142 
  143                                         ; Julia p5 initialization of stack
  144                                         ; note that init and parm are "reversed"
  145         fld     parmx                   ; Cx
  146         fld     parmy                   ; Cy Cx
  147         fld     rqlim                   ; b Cy Cx
  148 
  149         fld     initx                   ; x b Cy Cx
  150         fld     st                      ; x x b Cy Cx
  151         fmul    st,st                   ; x^2 x b Cy Cx
  152         fld     inity                   ; y x^2 x b Cy Cx
  153 
  154 bottom_of_dojulia_p5:
  155         fmul    st(2),st                ; y x^2 xy b Cy Cx
  156         fmul    st,st                   ; y^2 x^2 xy b Cy Cx
  157 
  158         fsub                            ; x^2-y^2 xy b Cy Cx
  159         fadd    st,st(4)                ; x^2-y^2+Cx xy b Cy Cx
  160         fxch                            ; xy x^2-y^2+Cx b Cy Cx
  161 
  162         fadd    st,st                   ; 2xy x^2-y^2+Cx b Cy Cx
  163         fadd    st,st(3)                ; 2xy+Cy x^2-y^2+Cx b Cy Cx
  164 
  165 ; first iteration complete
  166 ; {FPU stack all set, we're ready for the start of the loop}
EVEN
LoopStart:

; {While (Sqr(x) + Sqr(y) < b) and (Count < MaxIterations) do}
;    {square both numbers}
        fld     st(1)                   ;  {x, y, x, b, Cy, Cx}
        fmul    st(0),st(0)             ;  {x^2, y, x, b, Cy, Cx}
        fld     st(1)                   ;  {y, x^2, y, x, b, Cy, Cx}
        fmul    st(0),st(0)             ;  {y^2, x^2, y, x, b, Cy, Cx}

;    {add both squares and leave at top of stack ready for the compare}
        fld     st(1)                   ;  {x^2, y^2, x^2, y, x, b, Cy, Cx}
        fadd    st(0),st(1)             ;  {(y^2)+(x^2), y^2, x^2, y, x, b, Cy, Cx}
;    {Check to see if (x^2)+(y^2) < b and discard (x^2)+(y^2)}
        fcomp   st(5)                   ;  {y^2, x^2, y, x, b, Cy, Cx}

        cmp     ecx,ebx    ; put oldcoloriter in ebx above
        jae     SkipTasks  ; don't check periodicity
  167 
  168         fnstsw  ax         ;Get the pending NPX info into AX
  169 
  170         test    ecx,savedand_p5         ; save on 0, check on anything else
  171         jnz     do_check_p5_fast        ;  time to save a new "old" value
  172         jmp     save_new_old_value
  173 ;        jz      save_new_old_value
  174 ;        jmp     do_check_p5_fast        ;  time to save a new "old" value
  175 EVEN
  176 per_check_p5_ret_fast:
  177 
  178         pop     ax              ;pop AX to continue with the FCOMP test
  179         jz      short JustAfterFnstsw ;test that got us here, & pairable
  180         jmp     short JustAfterFnstsw ;since we have done the FNSTSW,
  181                                 ; Skip over next instruction     
  182 EVEN
  183 SkipTasks:
  184 
  185         fnstsw ax             ;  {Store the NPX status word in AX, no FWAIT}
  186 
  187 JustAfterFnstsw:
  188 
  189 ;  {FPU stack again has all the required elements for terminating the loop }
  190 ;  {Continue with the FCOMP test.}
  191 ;  {The following does the same as SAHF; JA @LoopEnd; but in 3 fewer cycles}
  192         shr     ah,1            ; {Shift right, shifts low bit into carry flag }
  193         jnc     short overbailout_p5  ; {Jmp if not carry.  Do while waiting for FPU }
  194 
  195 ;  {Temp = Sqr(x) - Sqr(y) + Cx}  {Temp = Newx}
  196 ;    {Subtract y^2 from Cx ...}
  197         fsubr   st(0),st(6)             ;  {Cx-y^2, x^2, y, x, b, Cy, Cx}
  198 
  199 ;  CAE changed this around for Pentium, 10 Oct 1998
  200 ;  exchange this pending result with y so there's no wait for fsubr to finish
        fxch    st(2)                   ; {y, x^2, Cx-y^2, x, b, Cy, Cx}

;  now compute x*y while the above fsubr is still running
        fmulp   st(3),st                ; {x^2, Cx-y^2, xy, b, Cy, Cx}

;    {... then add x^2 to Cx-y^2}
        faddp   st(1),st(0)             ; {Newx, xy, b, Cy, Cx}

;    {Place the temp (Newx) in the x slot ready for next time in the
;     loop, while placing xy in ST(0) to use below.}
        fxch    st(1)                   ;  {xy, Newx, b, Cy, Cx}

; {y = (y * x * 2) + Cy   (Use old x, not temp)}
;    {multiply y * x was already done above so it was removed here -- CAE}


;    {Now multiply x*y by 2 (add ST to ST)}
        fadd    st,st(0)                ;  {x*y*2, Newx, b, Cy, Cx}

;  compare was moved down so it would run concurrently with above add -- CAE
        cmp     show_orbit,0            ; is show_orbit clear

;    {Finally, add Cy to x*y*2}
        fadd    st(0),st(3)              ;  {Newy, Newx, b, Cy, Cx}

        jz      no_show_orbit_p5         ; if so then skip
        call    near ptr show_orbit_xy_p5  ; y x b Cy Cx
EVEN
no_show_orbit_p5:

        dec     ecx
        jnz     LoopStart
;        jmp     LoopStart    ;  {FPU stack has required elements for next loop}
EVEN
LoopEnd:                                ;  {Newy, Newx, b, Cy, Cx}

; reached maxit, inside
        mov     eax,maxit
        mov     oldcoloriter,-1        ; check periodicity immediately next time
        mov     realcoloriter,eax      ; save unadjusted realcolor
        mov     eax,inside_color
        jmp     short overiteration_p5
EVEN
overbailout_p5:

        fadd                            ; x^2+y^2 y x b Cy Cx
        mov     eax,ecx
        fstp    magnitude               ; y x b Cy Cx
        sub     eax,10                  ; 10 more next time before checking

        jns     no_fix_underflow_p5
; if the number of iterations was within 10 of maxit, then subtracting
; 10 would underflow and cause periodicity checking to start right
; away.  Catching a period doesn't occur as often in the pixels at
  201 ; the edge of the set anyway.
  202         sub     eax,eax                 ; don't check next time
no_fix_underflow_p5:
        mov     oldcoloriter,eax        ; check when past this - 10 next time
        mov     eax,maxit
        sub     eax,ecx                 ; leave 'times through loop' in eax

; zero color fix
        jnz     zero_color_fix_p5
        inc     eax                     ; if (eax == 0 ) eax = 1
zero_color_fix_p5:
        mov     realcoloriter,eax       ; save unadjusted realcolor
        sub     kbdcount,ax             ; adjust the keyboard count

        cmp     outside,-1              ; iter ? (most common case)
        je      overiteration_p5
        cmp     outside,-2              ; outside <= -2 ?
        jle     to_special_outside_p5   ; yes, go do special outside options
        sub     eax,eax                 ; clear top half of eax for next
        mov     ax,outside              ; use outside color
        jmp     short overiteration_p5

to_special_outside_p5:

        call    near ptr special_outside_p5
EVEN
overiteration_p5:

        fstp    newy                    ; x b Cy Cx
        fstp    newx                    ; b Cy Cx


;    {Pop 3 used registers from FPU stack, discarding the values.
;       All we care about is ECX, the count.}

        fcompp
        fstp    st
        mov     coloriter,eax

        cmp     orbit_ptr,0             ; any orbits to clear?
        je      calcmandfpasm_ret_p5    ; nope.
        call    far ptr scrub_orbit     ; clear out any old orbits
        mov     eax,coloriter           ; restore color
                                        ; speed not critical here in orbit land

calcmandfpasm_ret_p5:

        mov     edx,eax       ;     {The low 16 bits already in AX}
        shr     edx,16        ;     {Shift high 16 bits to low 16 bits position}

        ret

calcmandfpasm_p5   ENDP

ALIGN 16
show_orbit_xy_p5   PROC NEAR USES ebx ecx edx esi edi
IFDEF @Version        ; MASM
IF @Version lt 600
        local   tmp_ten_byte_0:tbyte    ; stupid klooge for MASM 5.1 LOCAL bug
ENDIF
ENDIF
        local   tmp_ten_byte_1:tbyte
        local   tmp_ten_byte_2:tbyte
        local   tmp_ten_byte_3:tbyte
        local   tmp_ten_byte_4:tbyte
        local   tmp_ten_byte_5:tbyte
; USES is needed because in all likelyhood, plot_orbit surely
; uses these registers.  It's ok to have to push/pop's here in the
; orbits as speed is not crucial when showing orbits.

                                        ; fpu stack is either
                                        ; y x b Cx Cy (p5)
        fld     st(1)                   ;
                                        ; x y ...
                                        ; and needs to returned as
                                        ; y ...

        fstp    orbit_real              ; y ...
        fst     orbit_imag              ; y ...
        mov     ax,-1                   ; color for plot orbit
        push    ax                      ;       ...
; since the number fpu registers that plot_orbit() preserves is compiler
; dependant, it's best to fstp the entire stack into 10 byte memories
  203 ; and fld them back after plot_orbit() returns.
  204         fstp    tmp_ten_byte_1          ; store the stack in 80 bit form
  205         fstp    tmp_ten_byte_2
  206         fstp    tmp_ten_byte_3
  207         fstp    tmp_ten_byte_4
  208         fstp    tmp_ten_byte_5
  209         fwait                           ; just to be safe
  210 ;        push    word ptr orbit_imag+6   ; co-ordinates for plot orbit
  211         push    dword ptr orbit_imag+4   ;       ...
  212 ;        push    word ptr orbit_imag+2   ;       ...
  213         push    dword ptr orbit_imag     ;       ...
  214 ;        push    word ptr orbit_real+6   ; co-ordinates for plot orbit
  215         push    dword ptr orbit_real+4   ;       ...
  216 ;        push    word ptr orbit_real+2   ;       ...
  217         push    dword ptr orbit_real     ;       ...
  218         call    far ptr plot_orbit      ; display the orbit
  219         add     sp,9*2                  ; clear out the parameters
  220 
  221         fld     tmp_ten_byte_5
  222         fld     tmp_ten_byte_4
  223         fld     tmp_ten_byte_3
  224         fld     tmp_ten_byte_2
  225         fld     tmp_ten_byte_1
  226         fwait                           ; just to be safe
  227         ret
  228 show_orbit_xy_p5   ENDP
  229 
  230 ALIGN 16
  231 special_outside_p5 PROC NEAR
  232 ; When type casting floating point variables to integers in C, the decimal
  233 ; is truncated.  When using FIST in asm, the value is rounded.  Using
  234 ; "FSUB round_down_half" causes the values to be rounded down.
  235 ; Boo-Hiss if values are negative, change FPU control word to truncate values.
  236 LOCAL Control:word
  237         fstcw Control
  238         push  Control                       ; Save control word on the stack
  239         or    Control, 0000110000000000b
  240         fldcw Control                       ; Set control to round towards zero
  241 
  242         cmp     outside,-2
  243         jne     short not_real
  244 
  245         fld     st(1)                  ; newx
  246         test    bad_outside,1h
  247         jz      over_bad_real
  248         fsub    round_down_half
  249         jmp     over_good_real
  250 over_bad_real:
  251         frndint
  252 over_good_real:
  253         fistp   tmp_dword
  254         add     eax,7
  255         add     eax,tmp_dword
  256         jmp     check_color
  257 not_real:
  258         cmp     outside,-3
  259         jne     short not_imag
  260         fld     st(0)            ; newy
  261         test    bad_outside,1h
  262         jz      short over_bad_imag
  263         fsub    round_down_half
  264         jmp     short over_good_imag
  265 over_bad_imag:
  266         frndint
  267 over_good_imag:
  268         fistp   tmp_dword
  269         add     eax,7
  270         add     eax,tmp_dword
  271         jmp     check_color
  272 not_imag:
  273         cmp     outside,-4
  274         jne     short not_mult
  275         push    ax              ; save current ax value
  276         fld     st(0)           ; newy
  277         ftst                    ; check to see if newy == 0
  278         fstsw   ax
  279         sahf
  280         pop     ax              ; retrieve ax (does not affect flags)
  281         jne     short non_zero_y
  282         fcomp   st(0)           ; pop it off the stack
  283 ;        ret                     ; if y==0, return with normal ax
  284         jmp     special_outside_ret
  285 non_zero_y:
  286         fdivr   st(0),st(2)         ; newx/newy
  287         mov     tmp_dword,eax
  288         fimul   tmp_dword       ; (ax,dx)*newx/newy  (Use FIMUL instead of MUL
  289         test    bad_outside,1h
  290         jz      short over_bad_mult
  291         fsub    round_down_half ; to make it match the C code.)
  292         jmp     short over_good_mult
  293 over_bad_mult:
  294         frndint
  295 over_good_mult:
  296         fistp   tmp_dword
  297         fwait
  298         mov     eax,tmp_dword
  299         jmp     short check_color
  300 not_mult:
  301         cmp     outside,-5
  302         jne     short not_sum
  303         fld     st(1)           ; newx
  304         fadd    st(0),st(1)     ; newx+newy
  305         test    bad_outside,1h
  306         jz     short over_bad_summ
  307         fsub    round_down_half
  308         jmp     short over_good_summ
  309 over_bad_summ:
  310         frndint
  311 over_good_summ:
  312         fistp   tmp_dword
  313         fwait
  314         add     eax,tmp_dword
  315         jmp     short check_color
  316 not_sum:
  317         cmp     outside,-6      ; currently always equal, but put here
  318         jne     short not_atan        ; for future outside types
  319         fld     st(0)           ; newy
  320         fld     st(2)           ; newx newy
  321         fpatan                  ; arctan(y/x)
  322         fimul   atan_colors     ; atan_colors*atan
  323         fldpi                   ; pi atan_colors*atan
  324         fdiv                    ; atan_colors*atan/pi
  325         fabs
  326         frndint
  327         fistp   tmp_dword
  328         fwait
  329         mov     eax,tmp_dword
  330 
  331 not_atan:
  332 check_color:
  333         cmp     eax,maxit               ; use UNSIGNED comparison
  334         jbe     short check_release     ; color < 0 || color > maxit
  335         sub     eax,eax                 ; eax = 0
  336 check_release:
  337         cmp     save_release,1961
  338         jb      short special_outside_ret
  339         cmp     eax,0
  340         jne     special_outside_ret
  341         mov     eax,1                   ; eax = 1
  342 special_outside_ret:
  343         pop   Control
  344         fldcw Control              ; Restore control word
  345         ret
  346 special_outside_p5 ENDP
  347 
  348 calmanp5_text  ENDS
  349 
  350 END
  351 
  352