File: dos\calcmand.asm
1 ; CALCMAND.ASM - Mandelbrot/Julia Set calculation Routines
2
3 ; This module runs as part of an overlay with calcfrac.c.
4 ; It must not be called from anywhere other than calcfrac.
5
6 ; The routines in this code perform Mandelbrot and Julia set
7 ; calculations using 32-bit integer math as opposed to the
8 ; "traditional" floating-point approach.
9
10 ; This code relies on several tricks to run as quickly as it does.
11
12 ; One can fake floating point arithmetic by using integer
13 ; arithmetic and keeping track of the implied decimal point
14 ; if things are reasonable -- and in this case, they are.
15 ; I replaced code that looked like: z = x*y with code that
16 ; looks like:
17 ; ix = x * ifudge (outside the loops)
18 ; iy = y * ifudge
19 ; ....
20 ; iz = (ix * iy) / ifudge (inside the loops)
21 ; (and keep remembering that all the integers are "ifudged" bigger)
22
23 ; The 386 has native 32-bit integer arithmetic, and (briefly) keeps
24 ; 64-bit values around after 32-bit multiplies. If the result is
25 ; divided down right away, you've got 64-bit arithmetic. You just
; have to ensure that the result after the divide is <= 32 bits long.
; CPUs predating the 386 have to emulate 32-bit arithmetic using
; 16-bit arithmetic, which is significantly slower.
; Dividing is slow -- but shifting is fast, and we can select our
; "fudge factor" to be a power of two, permitting us to use that
; method instead. In addition, the 386 can perform 32-bit wide
; shifting -- and even 64-bit shifts with the following logic:
; shdr eax,edx,cl
; shr edx,cl
; so we make sure that our "fudge factor" is a power of 2 and shift
; it down that way.
; Calcmand is hardcoded for a fudge factor of 2**29.
; Bert Tyler
; History since Fractint 16.0
; (See comments with CJLT in them)
; CJLT=Chris Lusby Taylor who has...
;
; 1. Speeded up 16 bit on 16 bit CPU
; Minor changes, notably prescaling to fg14 before multiplying
; instead of scaling the answer.
; Also, I added overflow detection after adding linit, since it
; seems this could overflow.
; Overall effect is about 10 97538516110730176000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.000000aster on 386 with debugflag=8088
; 2. Speeded up 32 bit on 16 bit CPU
; The macro `square' is totally rewritten, as is the logic for 2xy,
26 ; by prescaling x and y to fg31, not fg29. This allows us to do a
27 ; 32 bit multiply in 3, not 4, 16 bit chunks while retaining full
28 ; fg29 accuracy.
29 ; Also, I removed lots of well-meaning but ineffective code handling
30 ; special cases of zeros and tidied up the handling of negative numbers,
31 ; so the routine is quite a bit shorter now and overall throughput of
32 ; Mandel is now over 40 faster on a 386 with debugflag=8088.
33 ; By the way, I was tempted to go the whole hog and replace x*x-y*y
34 ; by (x+y)*(x-y) to reduce 4 16-bit multiplys to 3, but it makes
35 ; escape detection a bit trickier. Another time, maybe.
36 ;
37 ; 3. Made maxit a dword variable. 1/18/94
38
39 ; required for compatibility if Turbo ASM
40 IFDEF ??version
41 MASM51
42 QUIRKS
43 ENDIF
44
45 .MODEL medium,c
46 DGROUP group _DATA,_DATA2
47
48 .8086
49
50 ; these must NOT be in any segment!!
51 ; this get's rid of TURBO-C fixup errors
extrn keypressed:far ; this routine is in 'general.asm'
extrn getakey:far ; this routine is in 'general.asm'
extrn iplot_orbit:far ; this routine is in 'calcfrac.c'
extrn scrub_orbit:far ; this routine is in 'calcfrac.c'
_DATA2 segment DWORD PUBLIC 'DATA'
FUDGEFACTOR equ 29 ; default (non-potential) fudgefactor
KEYPRESSDELAY equ 32767 ; 7FFFh
; ************************ External variables *****************************
extrn fractype:word ; == 0 if Mandelbrot set, else Julia
extrn inside:word ; "inside" color, normally 1 (blue)
extrn outside:word ; "outside" color, normally -1 (iter)
extrn creal:dword, cimag:dword ; Julia Set Constant
extrn delmin:dword ; min increment - precision required
extrn maxit:dword ; maximum iterations
extrn lm:dword ; magnitude bailout limit
extrn coloriter:dword ; iterations calculated for the pixel
extrn realcoloriter:dword
extrn row:word, col:word ; current pixel to calc
extrn reset_periodicity:word ; nonzero if to be reset
extrn kbdcount:word ; keyboard counter
extrn cpu:word ; cpu type: 86, 186, 286, or 386
extrn dotmode:word
extrn show_orbit:word ; "show-orbit" flag
extrn orbit_ptr:word ; "orbit pointer" flag
extrn periodicitycheck:word ; no periodicity if zero
extrn lclosenuff:dword
public linitx,linity ; caller sets these
extrn nextsavedincr:word ; for incrementing AND value
extrn firstsavedand:dword ; AND value
extrn showdot:word
extrn orbit_delay:word
; ************************ Internal variables *****************************
align 4
x dd 0 ; temp value: x
y dd 0 ; temp value: y
;absx dd 0 ; temp value: abs(x)
linitx dd 0 ; initial value, set by calcfrac
linity dd 0 ; initial value, set by calcfrac
savedx dd 0 ; saved values of X and Y iterations
savedy dd 0 ; (for periodicity checks)
k dd 0 ; iteration countdown counter
oldcoloriter dd 0 ; prior pixel's escape time k value
52 savedand dd 0 ; AND value for periodicity checks
53 savedincr dw 0 ; flag for incrementing AND value
54 period db 0 ; periodicity, if in the lake
55
56 _DATA2 ends
57
58 .CODE
59
60 ; ***************** Function calcmandasm() **********************************
61
62 public calcmandasm
63
64 FRAME MACRO regs
65 push bp
66 mov bp, sp
67 IRP reg,
68 push reg
69 ENDM
70 ENDM
71
72 UNFRAME MACRO regs
73 IRP reg,
74 pop reg
75 ENDM
76 pop bp
77 ENDM
78
79 calcmandasm proc
80 FRAME ; std frame, for TC++ overlays
81 sub ax,ax ; clear ax
82 mov dx,ax ; clear dx
83 cmp periodicitycheck,ax ; periodicity checking disabled?
84 je initoldcolor ; yup, set oldcolor 0 to disable it
85 cmp reset_periodicity,ax ; periodicity reset?
86 je short initparms ; inherit oldcolor from prior invocation
87 mov ax,word ptr maxit ; yup. reset oldcolor to maxit-250
88 mov dx,word ptr maxit+2
89 sub ax,250 ; (avoids slowness at high maxits)
90 sbb dx,0
91
92 initoldcolor:
93 mov word ptr oldcoloriter,ax ; reset oldcoloriter
94 mov word ptr oldcoloriter+2,dx ; reset oldcoloriter
95
96 initparms:
97 mov ax,word ptr creal ; initialize x == creal
98 mov dx,word ptr creal+2 ; ...
99 mov word ptr x,ax ; ...
100 mov word ptr x+2,dx ; ...
101
102 mov ax,word ptr cimag ; initialize y == cimag
103 mov dx,word ptr cimag+2 ; ...
104 mov word ptr y,ax ; ...
105 mov word ptr y+2,dx ; ...
106
107 mov ax,word ptr maxit ; setup k = maxit
108 mov dx,word ptr maxit+2
109 add ax,1 ; (+ 1)
110 adc dx,0
111 mov word ptr k,ax ; (decrementing to 0 is faster)
112 mov word ptr k+2,dx
113
114 cmp fractype,1 ; julia or mandelbrot set?
115 je short dojulia ; julia set - go there
116
117 ; (Tim wants this code changed so that, for the Mandelbrot,
118 ; Z(1) = (x + iy) + (a + ib). Affects only "fudged" Mandelbrots.
119 ; (for the "normal" case, a = b = 0, and this works, too)
120 ; cmp word ptr x,0 ; Mandelbrot shortcut:
121 ; jne short doeither ; if creal = cimag = 0,
122 ; cmp word ptr x+2,0 ; the first iteration can be emulated.
123 ; jne short doeither ; ...
124 ; cmp word ptr y,0 ; ...
125 ; jne short doeither ; ...
126 ; cmp word ptr y+2,0 ; ...
127 ; jne short doeither ; ...
128 ; dec k ; we know the first iteration passed
129 ; mov dx,word ptr linitx+2 ; copy x = linitx
130 ; mov ax,word ptr linitx ; ...
131 ; mov word ptr x+2,dx ; ...
132 ; mov word ptr x,ax ; ...
133 ; mov dx,word ptr linity+2 ; copy y = linity
134 ; mov ax,word ptr linity ; ...
135 ; mov word ptr y+2,dx ; ...
136 ; mov word ptr y,ax ; ...
137
138 sub word ptr k,1 ; we know the first iteration passed
139 sbb word ptr k+2,0 ; we know the first iteration passed
140 mov dx,word ptr linitx+2 ; add x += linitx
141 mov ax,word ptr linitx ; ...
142 add word ptr x,ax ; ...
143 adc word ptr x+2,dx ; ...
144 mov dx,word ptr linity+2 ; add y += linity
145 mov ax,word ptr linity ; ...
146 add word ptr y,ax ; ...
147 adc word ptr y+2,dx ; ...
148 jmp short doeither ; branch around the julia switch
149
150 dojulia: ; Julia Set initialization
151 ; "fudge" Mandelbrot start-up values
152 mov ax,word ptr x ; switch x with linitx
153 mov dx,word ptr x+2 ; ...
154 mov bx,word ptr linitx ; ...
155 mov cx,word ptr linitx+2 ; ...
156 mov word ptr x,bx ; ...
157 mov word ptr x+2,cx ; ...
158 mov word ptr linitx,ax ; ...
159 mov word ptr linitx+2,dx ; ...
160
161 mov ax,word ptr y ; switch y with linity
162 mov dx,word ptr y+2 ; ...
163 mov bx,word ptr linity ; ...
164 mov cx,word ptr linity+2 ; ...
165 mov word ptr y,bx ; ...
166 mov word ptr y+2,cx ; ...
167 mov word ptr linity,ax ; ...
168 mov word ptr linity+2,dx ; ...
169
170 doeither: ; common Mandelbrot, Julia set code
171 mov period,0 ; claim periodicity of 1
172 mov ax,word ptr firstsavedand ; initial periodicity check
173 mov word ptr savedand,ax ; initial periodicity check
174 mov ax,word ptr firstsavedand+2 ; initial periodicity check
175 mov word ptr savedand+2,ax ; initial periodicity check
176 mov savedincr,1 ; flag for incrementing periodicity
177 mov word ptr savedx+2,0ffffh; impossible value of "old" x
178 mov word ptr savedy+2,0ffffh; impossible value of "old" y
179 mov orbit_ptr,0 ; clear orbits
180
181 dec kbdcount ; decrement the keyboard counter
182 jns nokey ; skip keyboard test if still positive
183 mov kbdcount,10 ; stuff in a low kbd count
184 cmp show_orbit,0 ; are we showing orbits?
185 jne quickkbd ; yup. leave it that way.
186 cmp orbit_delay,0 ; are we delaying orbits?
187 je slowkbd ; nope. change it.
188 cmp showdot,0 ; are we showing the current pixel?
189 jge quickkbd ; yup. leave it that way.
190 slowkbd:
191 mov kbdcount,5000 ; else, stuff an appropriate count val
192 cmp cpu,386 ; ("appropriate" to the CPU)
193 jae short kbddiskadj ; ...
194 ;; cmp word ptr delmin+2,1 ; is 16-bit math good enough?
195 cmp word ptr delmin+2,8 ; is 16-bit math good enough?
196 ja kbddiskadj ; yes. test less often
197 mov kbdcount,500 ; no. test more often
198 kbddiskadj:
199 cmp dotmode,11 ; disk video?
200 jne quickkbd ; no, leave as is
201 shr kbdcount,1 ; yes, reduce count
202 shr kbdcount,1 ; yes, reduce count
203 quickkbd:
204 call far ptr keypressed ; has a key been pressed?
205 cmp ax,0 ; ...
206 je nokey ; nope. proceed
207 mov kbdcount,0 ; make sure it goes negative again
208 cmp ax,'o' ; orbit toggle hit?
209 je orbitkey ; yup. show orbits
210 cmp ax,'O' ; orbit toggle hit?
211 jne keyhit ; nope. normal key.
212 orbitkey:
213 call far ptr getakey ; read the key for real
214 mov ax,1 ; reset orbittoggle = 1 - orbittoggle
215 sub ax,show_orbit ; ...
216 mov show_orbit,ax ; ...
217 jmp short nokey ; pretend no key was hit
218 keyhit: mov ax,-1 ; return with -1
219 mov dx,ax
220 mov word ptr coloriter,ax ; set coloriter to -1
221 mov word ptr coloriter+2,dx
222 UNFRAME ; pop stack frame
223 ret ; bail out!
224
225 nokey:
226 cmp show_orbit,0 ; is orbiting on?
227 jne no16bitcode ; yup. slow down.
228 cmp cpu,386 ; are we on a 386?
229 jae short code386bit ; YAY!! 386-class speed!
230 ;; cmp word ptr delmin+2,1 ; OK, we're desperate. 16 bits OK?
cmp word ptr delmin+2,8 ; OK, we're desperate. 16 bits OK?
231 ja yes16bitcode ; YAY! 16-bit speed!
232 no16bitcode:
233 call near ptr code32bit ; BOO!! nap time. Full 32 bit math
234 cmp cx,-1
235 je keyhit ; key stroke, get us out of here
236 jmp kloopend ; bypass the 386-specific code.
237 yes16bitcode:
238 call near ptr code16bit ; invoke the 16-bit version
239 cmp cx,-1
240 je keyhit ; key stroke, get us out of here
241 jmp kloopend ; bypass the 386-specific code.
242
243 .386 ; 386-specific code starts here
244
245 code386bit:
246 ;; cmp word ptr delmin+2,3 ; is 16-bit math good enough?
247 cmp word ptr delmin+2,8 ; is 16-bit math good enough?
248 jbe code386_32 ; nope, go do 32 bit stuff
249 IFDEF ??version
250 jmp code386_32 ; TASM screws up IMUL EBX,EBX!!
251 ENDIF
252
253 ; 16 bit on 386, now we are really gonna move
254 movsx esi,word ptr x+2 ; use SI for X
255 movsx edi,word ptr y+2 ; use DI for Y
256 push ebp
257 mov ebp,-1
258 shl ebp,FUDGEFACTOR-1
259 mov cx,FUDGEFACTOR-16
260
261 kloop386_16: ; cx=bitshift-16, ebp=overflow.mask
262
263 mov ebx,esi ; compute (x * x)
264 imul ebx,ebx ; ...
265 test ebx,ebp ;
266 jnz short end386_16 ; (oops. We done.)
267 shr ebx,cl ; get result down to 16 bits
268
269 mov edx,edi ; compute (y * y)
270 imul edx,edx ; ...
271 test edx,ebp ; say, did we overflow?
272 jnz short end386_16 ; (oops. We done.)
273 shr edx,cl ; get result down to 16 bits
274
275 mov ax,bx ; compute (x*x - y*y) / fudge
276 sub bx,dx ; for the next iteration
277
278 add ax,dx ; compute (x*x + y*y) / fudge
279
280 cmp ax,word ptr lm+2 ; while (xx+yy < lm)
281 jae short end386_16 ; ...
282
283 imul edi,esi ; compute (y * x)
284 shl edi,1 ; ( * 2 / fudge)
285 sar edi,cl
286 add di,word ptr linity+2 ; (2*y*x) / fudge + linity
287 movsx edi,di ; save as y
288
289 add bx,word ptr linitx+2 ; (from above) (x*x - y*y)/fudge + linitx
290 movsx esi,bx ; save as x
291
292 ; mov eax,oldcoloriter ; recall the old color
293 ; cmp eax,k ; check it against this iter
294 ; jge short chkpd386_16 ; yup. do periodicity check.
295 mov eax,k ; rearranged for speed
296 cmp eax,oldcoloriter
297 jb short chkpd386_16
298 nonmax386_16:
299 ; miraculously, k is always loaded into eax at this point
300 ; mov eax,k ; set up to test for key stroke
301 test eax,KEYPRESSDELAY
302 jne notakey1 ; don't test yet
push cx
call far ptr keypressed ; has a key been pressed?
pop cx
cmp ax,0 ; ...
je notakey1 ; nope. proceed
pop ebp
jmp keyhit
notakey1:
dec k ; while (k < maxit)
jnz short kloop386_16 ; try, try again
end386_16:
pop ebp
jmp kloopend32 ; we done
chkpd386_16:
; mov eax,k ; set up to test for save-time ;; already loaded
test eax,savedand ; save on 0, check on anything else
jz short chksv386_16 ; time to save a new "old" value
mov bx,si ; load up x
xor bx,word ptr savedx+2 ; does X match?
cmp bx,word ptr lclosenuff+2 ; truncate to appropriate precision
ja short nonmax386_16 ; nope. forget it.
mov bx,di ; now test y
xor bx,word ptr savedy+2 ; does Y match?
cmp bx,word ptr lclosenuff+2 ; truncate to appropriate precision
ja short nonmax386_16 ; nope. forget it.
mov period,1 ; note that we have found periodicity
mov k,0 ; pretend maxit reached
jmp short end386_16
chksv386_16:
mov word ptr savedx+2,si ; save x
mov word ptr savedy+2,di ; save y
dec savedincr ; time to change the periodicity?
jnz short nonmax386_16 ; nope.
shl savedand,1 ; well then, let's try this one!
303 inc savedand ; (2**n +1)
304 mov ax,nextsavedincr ; and reset the increment flag
305 mov savedincr,ax ; and reset the increment flag
306 jmp short nonmax386_16
307
308 ; 32bit on 386:
309 code386_32:
310 mov esi,x ; use ESI for X
311 mov edi,y ; use EDI for Y
312
313 ; This is the main processing loop. Here, every T-state counts...
314
315 kloop: ; for (k = 0; k <= maxit; k++)
316
317 mov eax,esi ; compute (x * x)
318 imul esi ; ...
319 shrd eax,edx,FUDGEFACTOR ; ( / fudge)
320 shr edx,FUDGEFACTOR-1 ; (complete 64-bit shift and check
321 jne short kloopend1 ; bail out if too high
322 mov ebx,eax ; save this for below
323
324 mov eax,edi ; compute (y * y)
325 imul edi ; ...
326 shrd eax,edx,FUDGEFACTOR ; ( / fudge)
327 shr edx,FUDGEFACTOR-1 ; (complete 64-bit shift and check
328 jne short kloopend1 ; bail out if too high
329
330 mov ecx,ebx ; compute (x*x - y*y) / fudge
331 sub ebx,eax ; for the next iteration
332
333 add ecx,eax ; compute (x*x + y*y) / fudge
334 cmp ecx,lm ; while (lr < lm)
335 jae short kloopend1 ; ...
336
337 mov eax,edi ; compute (y * x)
338 imul esi ; ...
339 shrd eax,edx,FUDGEFACTOR-1 ; ( * 2 / fudge)
340 add eax,linity ; (above) + linity
341 mov edi,eax ; save this as y
342
343 ; (from the earlier code) ; compute (x*x - y*y) / fudge
344 add ebx,linitx ; + linitx
345 mov esi,ebx ; save this as x
346
347 ; mov eax,oldcoloriter ; recall the old coloriter
348 ; cmp eax,k ; check it against this iter
349 ; jge short chkperiod1
350 mov eax,k ; rearranged for speed
351 cmp eax,oldcoloriter
352 jb short chkperiod1
353 nonmax1:
354 mov eax,k
355 test eax,KEYPRESSDELAY
356 jne notakey2 ; don't test yet
call far ptr keypressed ; has a key been pressed?
cmp ax,0 ; ...
je notakey2 ; nope. proceed
jmp keyhit
notakey2:
dec k ; while (k < maxit) (dec to 0 is faster)
jnz kloop ; while (k < maxit) ...
kloopend1:
jmp short kloopend32 ; we done.
chkperiod1:
; mov eax,k ; already done
test eax,savedand
jz short chksave1
mov eax,esi
xor eax,savedx
cmp eax,lclosenuff
ja short nonmax1
mov eax,edi
xor eax,savedy
cmp eax,lclosenuff
ja short nonmax1
mov period,1 ; note that we have found periodicity
mov k,0 ; pretend maxit reached
jmp short kloopend32 ; we done.
chksave1:
mov eax,k
mov savedx,esi
mov savedy,edi
dec savedincr ; time to change the periodicity?
jnz short nonmax1 ; nope.
shl savedand,1 ; well then, let's try this one!
357 inc savedand ; (2**n +1)
358 mov ax,nextsavedincr ; and reset the increment flag
359 mov savedincr,ax ; and reset the increment flag
360 jmp short nonmax1
361
362 kloopend32:
363
364 cmp orbit_ptr,0 ; any orbits to clear?
365 je noorbit32 ; nope.
366 call far ptr scrub_orbit ; clear out any old orbits
367 noorbit32:
368
369 mov eax, k ; set old color
370 sub eax,10 ; minus 10, for safety
371 mov oldcoloriter,eax ; and save it as the "old" color
372 mov eax,maxit ; compute color
373 sub eax,k ; (first, re-compute "k")
374 sub kbdcount,ax ; adjust the keyboard count (use ax only)
375 cmp eax,0 ; convert any "outlier" region
376 jg short coloradjust1_32 ; (where abs(x) > 2 or abs(y) > 2)
377 mov eax,1 ; to look like we ran through
378 coloradjust1_32: ; at least one loop.
379 mov realcoloriter,eax ; result before adjustments
380 cmp eax,maxit ; did we max out on iterations?
381 jne short notmax32 ; nope.
382 mov oldcoloriter,eax ; set "oldcolor" to maximum
383 cmp inside,0 ; is "inside" >= 0?
384 jl wedone32 ; nope. leave it at "maxit"
385 sub eax,eax
386 mov ax,inside ; reset max-out color to default
387 cmp periodicitycheck,0 ; show periodicity matches?
388 jge wedone32 ; nope.
389 ; mov al,period ; reset color to periodicity flag
390 cmp period,0
391 je wedone32
392 mov ax,7 ; use color 7 (default white)
393 jmp short wedone32
394
395 notmax32:
396 cmp outside,0 ; is "outside" >= 0?
397 jl wedone32 ; nope. leave as realcolor
398 sub eax,eax
399 mov ax, outside ; reset to "outside" color
400
401 wedone32: ;
402 mov coloriter,eax ; save the color result
403 shld edx,eax,16 ; put result in ax,dx
404 shr eax,16
405 UNFRAME ; pop stack frame
406 ret ; and return with color
407
408
409 .8086 ; 386-specific code ends here
410
411 kloopend:
412 cmp orbit_ptr,0 ; any orbits to clear?
413 je noorbit2 ; nope.
414 call far ptr scrub_orbit ; clear out any old orbits
415 noorbit2:
416
417 mov ax,word ptr k ; set old color
418 mov dx,word ptr k+2 ; set old color
419 sub ax,10 ; minus 10, for safety
420 sbb dx,0
421 mov word ptr oldcoloriter,ax ; and save it as the "old" color
422 mov word ptr oldcoloriter+2,dx ; and save it as the "old" color
423 mov ax,word ptr maxit ; compute color
424 mov dx,word ptr maxit+2 ; compute color
425 sub ax,word ptr k ; (first, re-compute "k")
426 sbb dx,word ptr k+2 ; (first, re-compute "k")
427 sub kbdcount,ax ; adjust the keyboard count
428 cmp dx,0 ; convert any "outlier" region
429 js short kludge_for_julia ; k can be > maxit!!!
430 ja short coloradjust1 ; (where abs(x) > 2 or abs(y) > 2)
431 cmp ax,0
432 ja short coloradjust1 ; (where abs(x) > 2 or abs(y) > 2)
433 kludge_for_julia:
434 mov ax,1 ; to look like we ran through
435 sub dx,dx
436 coloradjust1: ; at least one loop.
437 mov word ptr realcoloriter,ax ; result before adjustments
438 mov word ptr realcoloriter+2,dx ; result before adjustments
439 cmp dx,word ptr maxit+2 ; did we max out on iterations?
440 jne short notmax ; nope.
441 cmp ax,word ptr maxit ; did we max out on iterations?
442 jne short notmax ; nope.
443 mov word ptr oldcoloriter,ax ; set "oldcolor" to maximum
444 mov word ptr oldcoloriter+2,dx ; set "oldcolor" to maximum
445 cmp inside,0 ; is "inside" >= 0?
446 jl wedone ; nope. leave it at "maxit"
447 mov ax,inside ; reset max-out color to default
448 sub dx,dx
449 cmp periodicitycheck,0 ; show periodicity matches?
450 jge wedone ; nope.
451 ; sub ax,ax ; clear top half for next
452 ; mov al,period ; reset color to periodicity flag
453 ; cmp period,0
454 ; jz wedone
455 mov ax,7 ; use color 7 (default white)
456 jmp short wedone
457
458 notmax:
459 cmp outside,0 ; is "outside" >= 0?
460 jl wedone ; nope. leave as realcolor
461 mov ax, outside ; reset to "outside" color
462 sub dx,dx
463
464 wedone: ;
465 mov word ptr coloriter,ax ; save the color result
466 mov word ptr coloriter+2,dx ; save the color result
467 UNFRAME ; pop stack frame
468 ret ; and return with color
469
470 calcmandasm endp
471
472
473 ; ******************** Function code16bit() *****************************
474 ;
475 ; Performs "short-cut" 16-bit math where we can get away with it.
476 ; CJLT has modified it, mostly by preshifting x and y to fg30 from fg29
477 ; or, since we ignore the lower 16 bits, fg14 from fg13.
478 ; If this shift overflows we are outside x*x+y*y=2, so have escaped.
479 ; Also, he commented out several conditional jumps which he calculated could
480 ; never be taken (e.g. mov ax,si / imul si ;cannot overflow).
481
482 code16bit proc near
483
484 mov si,word ptr x+2 ; use SI for X fg13
485 mov di,word ptr y+2 ; use DI for Y fg13
486
487 start16bit:
488 add si,si ;CJLT-Convert to fg14
489 ; jo end16bit ;overflows if <-2 or >2
490 jno not_end16bit1
491 jmp end16bit ;overflows if <-2 or >2
492 not_end16bit1:
493 mov ax,si ; compute (x * x)
494 imul si ; Answer is fg14+14-16=fg12
495 ; cmp dx,0 ;CJLT commented out-
496 ; jl end16bit ;CJLT- imul CANNOT overflow
497 ; mov cx,32-FUDGEFACTOR ;CJLT. FUDGEFACTOR=29 is hard coded
498 loop16bit1:
499 shl ax,1 ; ...
500 rcl dx,1 ; ...
501 ; jo end16bit ; (oops. overflow)
502 jno not_end16bit2
503 jmp end16bit ; (oops. overflow)
504 not_end16bit2:
505 ; loop loop16bit1 ;CJLT...do it once only. dx now fg13.
506 mov bx,dx ; save this for a tad
507
508 ;ditto for y*y...
509
510 add di,di ;CJLT-Convert to fg14
511 ; jo end16bit ;overflows if <-2 or >2
512 jno not_end16bit3
513 jmp end16bit ;overflows if <-2 or >2
514 not_end16bit3:
515 mov ax,di ; compute (y * y)
516 imul di ; ...
517 ; cmp dx,0 ; say, did we overflow?
518 ; jl end16bit ; (oops. We done.)
519 ; mov cx,32-FUDGEFACTOR ; ( / fudge)
520 ;loop16bit2:
521 shl ax,1 ; ...
522 rcl dx,1 ; ...
523 jo end16bit ; (oops. overflow)
524 ; loop loop16bit2 ; ...
525
526 mov cx,bx ; compute (x*x - y*y) / fudge
527 sub bx,dx ; for the next iteration
528
529 add cx,dx ; compute (x*x + y*y) / fudge
530 jo end16bit ; bail out if too high
531 ; js end16bit ; ...
532
533 cmp cx,word ptr lm+2 ; while (xx+yy < lm)
534 jae end16bit ; ...
535 sub word ptr k,1 ; while (k < maxit)
536 sbb word ptr k+2,0
537 jnz notdoneyet
538 cmp word ptr k,0
539 jz end16bit ; we done.
540 notdoneyet:
541 mov ax,di ; compute (y * x) fg14+14=fg28
542 imul si ; ...
543 ; mov cx,33-FUDGEFACTOR-2 ; ( * 2 / fudge)
544 ;loop16bit3:
545 shl ax,1 ; ...
546 rcl dx,1 ; ...
547 shl ax,1 ; shift two bits
548 rcl dx,1 ; cannot overflow as |x|<=2, |y|<=2
549 ; loop loop16bit3 ; ...
550 add dx,word ptr linity+2 ; (2*y*x) / fudge + linity
551 jo end16bit ; bail out if too high
552 mov di,dx ; save as y
553
554 add bx,word ptr linitx+2 ; (from above) (x*x - y*y)/fudge + linitx
555 jo end16bit ; bail out if too high
556 mov si,bx ; save as x
557
558 mov dx,word ptr oldcoloriter+2 ; recall the old color
559 cmp dx,word ptr k+2 ; check it against this iter
560 jb short nonmax3 ; nope. bypass periodicity check.
561 mov ax,word ptr oldcoloriter ; recall the old color
562 cmp ax,word ptr k ; check it against this iter
563 jb short nonmax3 ; nope. bypass periodicity check.
564 mov word ptr x+2,si ; save x for periodicity check
565 mov word ptr y+2,di ; save y for periodicity check
566 call checkperiod ; check for periodicity
567 nonmax3:
568 mov ax,word ptr k ; set up to test for key stroke
569 test ax,KEYPRESSDELAY
570 jne notakey3 ; don't test yet
push cx
push bx
call far ptr keypressed ; has a key been pressed?
pop bx
pop cx
cmp ax,0 ; ...
je notakey3 ; nope. proceed
mov cx,-1
jmp short end16bitgotkey ; cx set, jump to end
notakey3:
jmp start16bit ; try, try again.
end16bit: ; we done.
xor cx,cx ; no key so zero cx
end16bitgotkey: ; jump here if key
ret
code16bit endp
; The following routine checks for periodic loops (a known
; method of decay inside "Mandelbrot Lake", and an easy way to
; bail out of "lake" points quickly). For speed, only the
; high-order sixteen bits of X and Y are checked for periodicity.
; For accuracy, this routine is only fired up if the previous pixel
; was in the lake (which means that the FIRST "wet" pixel was
; detected by the dull-normal maximum iteration process).
checkperiod proc near ; periodicity check
mov ax,word ptr k ; set up to test for save-time
test ax,word ptr savedand ; save on 0, check on anything else
jnz notimeyet ; NOT time to save a new "old" value
mov dx,word ptr k+2 ; set up to test for save-time
test dx,word ptr savedand+2 ; save on 0, check on anything else
jz checksave ; time to save a new "old" value
notimeyet:
mov dx,word ptr x+2 ; load up x
xor dx,word ptr savedx+2
cmp dx,word ptr lclosenuff+2
ja checkdone
mov ax,word ptr x ; load up x
xor ax,word ptr savedx
cmp ax,word ptr lclosenuff
ja checkdone
mov dx,word ptr y+2 ; load up y
xor dx,word ptr savedy+2
cmp dx,word ptr lclosenuff+2
ja checkdone
mov ax,word ptr y ; load up y
xor ax,word ptr savedy
cmp ax,word ptr lclosenuff
ja checkdone
mov period,1 ; note that we have found periodicity
mov word ptr k,1 ; pretend maxit reached
mov word ptr k+2,0 ; pretend maxit reached
checksave:
mov dx,word ptr x+2 ; load up x
mov word ptr savedx+2,dx ; and save it
mov ax,word ptr x ; load up x
mov word ptr savedx,ax ; and save it
mov dx,word ptr y+2 ; load up y
mov word ptr savedy+2,dx ; and save it
mov ax,word ptr y ; load up y
mov word ptr savedy,ax ; and save it
dec savedincr ; time to change the periodicity?
jnz checkdone ; nope.
shl word ptr savedand,1 ; well then, let's try this one!
571 rcl word ptr savedand+2,1 ; well then, let's try this one!
add word ptr savedand,1 ; (2**n +1)
adc word ptr savedand+2,0 ; (2**n +1)
mov ax,nextsavedincr ; and reset the increment flag
mov savedincr,ax ; and reset the increment flag
checkdone:
ret ; we done.
checkperiod endp
; ******************** Function code32bit() *****************************
;
; Perform the 32-bit logic required using 16-bit logic
;
; New twice as fast logic,
; Courtesy of Bill Townsend and Mike Gelvin (CIS:73337,520)
; Even newer, faster still by Chris Lusby Taylor
; who noted that we needn't square the low word if we first multiply
572 ; by 4, since we only need 29 places after the point and this will
573 ; give 30. (We divide answer by two to give 29 bit shift in answer)
574 ; Also, he removed all testing for special cases where a word of operand
575 ; happens to be 0, since testing 65536 times costs more than the saving
576 ; 1 time out of 65536! (I benchmarked it. Just removing the tests speeds
577 ; us up by 3.)
578 ;
579 ;Note that square returns DI,AX squared in DX,AX now.
580 ; DI,AX is first converted to unsigned fg31 form.
581 ; (For its square to be representable in fg29 (range -4..+3.999)
582 ; DI:AX must be in the range 0..+1.999 which fits neatly into unsigned fg31.)
583 ; This allows us to ignore the part of the answer corresponding to AX*AX as it
584 ; is less than half a least significant bit of the final answer.
585 ; I thought you'd like that.
;
; As we prescaled DI:AX, we need to shift the answer 1 bit to the right to
; end up in fg29 form since 29=(29+2)+(29+2)-32-1
; However, the mid term AX*DI is needed twice, so the shifts cancel.
;
; Since abs(x) and abs(y) in fg31 form will be needed in calculating 2*X*Y
; we push them onto the stack for later use.
; Note that square does nor affect bl,si,bp
; and leaves highword of argument in di
; but destroys bh,cx
square MACRO donepops
LOCAL notneg
shl ax,1 ;Multiply by 2 to convert to fg30
rcl di,1 ;If this overflows DI:AX was negative
jnc notneg
not ax ; so negate it
not di ; ...
add ax,1 ; ...
adc di,0 ; ...
not bl ; change negswt
notneg: shl ax,1 ;Multiply by 2 again to give fg31
rcl di,1 ;If this gives a carry then DI:AX was >=2.0
;If its high bit is set then DI:AX was >=1.0
;This is OK, but note that this means that
;DI:AX must now be treated as unsigned.
jc donepops
push di ; save y or x (in fg31 form) on stack
push ax ; ...
mul di ;GET MIDDLE PART - 2*A*B
mov bh,ah ;Miraculously, it needs no shifting!
mov cx,dx
mov ax,di
mul ax ;SQUARE HIGH HWORD - A*A
shl bh,1 ;See if we round up
adc ax,1 ;Anyway, add 1 to round up/down accurately
adc dx,0
shr dx,1 ;This needs shifting one bit
rcr ax,1
add ax,cx ;Add in the 2*A*B term
adc dx,0
ENDM ;#EM
code32bit proc near
;
; BL IS USED FOR THE "NEGSWT" FLAG
; NEGSWT IS ONE IF EITHER "X" OR "Y" ARE NEGATIVE, BUT NOT BOTH NEGATIVE
;
push bp
xor bl,bl ; NEGSWT STARTS OFF ZERO
; iteration loop
nextit: mov ax,word ptr y ; ax=low(y)
mov di,word ptr y+2 ; di=high(y)
square done1 ;square y and quit via done1 if it overflows
mov si,ax ; square returns results in dx,ax
mov bp,dx ; save y*y in bp,si
mov ax,word ptr x
mov di,word ptr x+2
square done2 ; square x and quit via done2 if it overflows
mov cx,ax ; Save low answer in cx.
ADD ax,si ; calc y*y + x*x
mov ax,bp
ADC ax,dx ; ...
jno nextxy ; overflow?
;NOTE: The original code tests against lm
;here, but as lm=4<<29 this is the same
;as testing for signed overflow
done4: add sp,4 ; discard saved value of |x| fg 31
done2: add sp,4 ; discard saved value of |y| fg 31
done1: xor cx,cx ; no key exit, zero cx
done0: ; exit here if key hit
pop bp ; restore saved bp
ret
;---------------------------------------------------------------------------
nextxy: sub word ptr k,1 ; while (k < maxit)
sbb word ptr k+2,0
jnz tryagain
cmp word ptr k,0
jz done4 ; we done.
tryagain:
sub cx,si ; subtract y*y from x*x
sbb dx,bp ; ...
add cx,word ptr linitx ; add "A"
adc dx,word ptr linitx+2 ; ...
jo done4 ;CJLT-Must detect overflow here
; but increment loop count first
mov word ptr x,cx ; store new x = x*x-y*y+a
mov word ptr x+2,dx ; ...
; now calculate x*y
;
;More CJLT tricks here. We use the pushed abs(x) and abs(y) in fg31 form
;which, when multiplied, give x*y in fg30, which, luckily, is the same as...
;2*x*y fg29.
;As with squaring, we can ignore the product of the low order words, and still
;be more accurate than the original algorithm.
;
pop bp ;Xlow
pop di ;Xhigh (already there, actually)
pop ax ;Ylow
mul di ;Xhigh * Ylow
mov bh,ah ;Discard lowest 8 bits of answer
mov cx,dx
pop ax ;Yhigh
mov si,ax ; copy it
mul bp ;Xlow * Yhigh
xor bp,bp ;Clear answer
add bh,ah
adc cx,dx
adc bp,0
mov ax,si ;Yhigh
mul di ;Xhigh * Yhigh
shl bh,1 ;round up/down
adc ax,cx ;Answer-low
adc dx,bp ;Answer-high
;NOTE: The answer is 0..3.9999 in fg29
js done1 ;Overflow if high bit set
or bl,bl ; ZERO IF NONE OR BOTH X , Y NEG
jz signok ; ONE IF ONLY ONE OF X OR Y IS NEG
not ax ; negate result
not dx ; ...
add ax,1 ; ...
adc dx,0 ; ...
xor bl,bl ;Clear negswt
signok:
add ax,word ptr linity
adc dx,word ptr linity+2 ; dx,ax = 2(X*Y)+B
jo done1
mov word ptr y,ax ; save the new value of y
mov word ptr y+2,dx ; ...
mov dx,word ptr oldcoloriter+2 ; recall the old color
cmp dx,word ptr k+2 ; check it against this iter
jb short chkkey4 ; nope. bypass periodicity check.
mov ax,word ptr oldcoloriter ; recall the old color
cmp ax,word ptr k ; check it against this iter
jb short chkkey4 ; nope. bypass periodicity check.
call checkperiod ; check for periodicity
chkkey4:
mov ax,word ptr k ; set up to test for key stroke
test ax,KEYPRESSDELAY
jne notakey4 ; don't test yet
586 push cx
587 push bx
588 call far ptr keypressed ; has a key been pressed?
589 pop bx
590 pop cx
591 cmp ax,0 ; ...
592 je notakey4 ; nope. proceed
593 mov cx,-1
594 jmp done0 ; cx set, jump to very end
595 notakey4:
596
597 chkmaxit:
598 cmp show_orbit,0 ; orbiting on?
599 jne horbit ; yep.
600 jmp nextit ;go around again
601
602 horbit: push bx ; save my flags
603 mov ax,-1 ; color for plot orbit
604 push ax ; ...
605 push word ptr y+2 ; co-ordinates for plot orbit
606 push word ptr y ; ...
607 push word ptr x+2 ; ...
608 push word ptr x ; ...
609 call far ptr iplot_orbit ; display the orbit
610 add sp,5*2 ; clear out the parameters
611 pop bx ; restore flags
612 jmp nextit ; go around again
613
614 code32bit endp
615
616 end
617