File: dos\fpu087.asm
1 TITLE fpu087.asm (C) 1989, Mark C. Peterson, CompuServe [70441,3353]
2 SUBTTL All rights reserved.
3 ;
4 ; Code may be used in any program provided the author is credited
5 ; either during program execution or in the documentation. Source
6 ; code may be distributed only in combination with public domain or
7 ; shareware source code. Source code may be modified provided the
8 ; copyright notice and this message is left unchanged and all
9 ; modifications are clearly documented.
10 ;
11 ; I would appreciate a copy of any work which incorporates this code.
12 ;
13 ; Mark C. Peterson
14 ; 405-C Queen St., Suite #181
15 ; Southington, CT 06489
16 ; (203) 276-9721
17 ;
18 ; References:
19 ; The VNR Concise Encyclopedia of Mathematics
20 ; by W. Gellert, H. Hustner, M. Hellwich, and H. Kastner
21 ; Published by Van Nostrand Reinhold Comp, 1975
22 ;
23 ; 80386/80286 Assembly Language Programming
24 ; by William H. Murray, III and Chris H. Pappas
25 ; Published by Osborne McGraw-Hill, 1986
26 ;
27 ; History since Fractint 16.3:
28 ; CJLT changed e2x and Log086 algorithms for more speed
29 ; CJLT corrected SinCos086 for -ve angles in 2nd and 4th quadrants
30 ; CJLT speeded up SinCos086 for angles >45 degrees in any quadrant
31 ; (See comments containing the string `CJLT')
; 14 Aug 91 CJLT removed r16Mul - not called from anywhere
; 21 Aug 91 CJLT corrected Table[1] from 6 to b
; improved bx factors in Log086 for more accuracy
; corrected Exp086 overflow detection to 15 from 16 bits.
; 07 Sep 92 MP corrected problem in FPUcplxlog
; 07 Sep 92 MP added argument test for FPUcplxdiv
; 06 Nov 92 CAE made some varibles public for PARSERA.ASM
; 07 Dec 92 CAE sped up FPUsinhcosh function
;
; CJLT= Chris J Lusby Taylor
; 32 Turnpike Road
; Newbury, England (where's that?)
32 ; Contactable via Compuserve user Stan Chelchowski [100016,351]
33 ; or Tel 011 44 635 33270 (home)
34 ;
35 ; CAE= Chuck Ebbert CompuServe [76306,1226]
36 ;
37 ;
38 ;PROCs in this module:
39 ;FPUcplxmul PROC x:word, y:word, z:word
40 ;FPUcplxdiv PROC x:word, y:word, z:word
41 ;FPUcplxlog PROC x:word, z:word
42 ;FPUsinhcosh PROC x:word, sinh:word, cosh:word
43 ;FPUsincos PROC x:word, sinx:word, cosx:word
44 ;r16Mul PROC uses si di, x1:word, x2:word, y1:word, y2:word
45 ;RegFloat2Fg PROC x1:word, x2:word, Fudge:word
46 ;RegFg2Float PROC x1:word, x2:word, FudgeFact:byte
47 ;ExpFudged PROC uses si, x_low:word, x_high:word, Fudge:word
48 ;LogFudged PROC uses si di, x_low:word, x_high:word, Fudge:word
49 ;LogFloat14 PROC x1:word, x2:word
50 ;RegSftFloat PROC x1:word, x2:word, Shift:byte
51 ;RegDivFloat PROC uses si di, x1:word, x2:word, y1:word, y2:word
52 ;SinCos086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinAddr:WORD, \
53 ;_e2xLT PROC ;argument in dx.ax (bitshift=16 is hard coded here)
54 ;Exp086 PROC uses si di, LoNum:WORD, HiNum:WORD
55 ;SinhCosh086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinhAddr:WORD, \
56 ;Log086 PROC uses si di, LoNum:WORD, HiNum:WORD, Fudge:WORD
57
58
59 IFDEF ??version
60 MASM51
61 QUIRKS
62 ENDIF
63
64 .model medium, c
65
66 extrn cos:far
67 extrn _Loaded387sincos:far
68 extrn compiled_by_turboc:word
69
70
71 .data
72
73 extrn cpu:WORD
74 extrn overflow:word
75 ;extrn save_release:word
76 PUBLIC TrigLimit
77
78 ; CAE 6Nov92 made these public for PARSERA.ASM */
79 PUBLIC _1_, _2_, PointFive, infinity
80
81 PiFg13 dw 6487h
82 InvPiFg33 dd 0a2f9836eh
83 InvPiFg16 dw 517ch
84 Ln2Fg16 dw 0b172h ;ln(2) * 2^16 . True value is b172.18
85 Ln2Fg15 dw 058b9h ;used by e2xLT. True value is 58b9.0C
86 TrigLimit dd 0
87 ;
88 ;Table of 2^(n/16) for n=0 to 15. All entries fg15.
89 ;Used by e2xLT
90 ;
91 Table dw 08000h,085abh,08b96h,091c4h
92 dw 09838h,09ef5h,0a5ffh,0ad58h
93 dw 0b505h,0bd09h,0c567h,0ce25h
94 dw 0d745h,0e0cdh,0eac1h,0f525h
95 one dw ?
96 expSign dw ?
97 a dw ?
98 SinNeg dw ? ;CJLT - not now needed by SinCos086, but by
99 CosNeg dw ? ; ArcTan086
100 Ans dq ?
101 fake_es dw ? ; Windows can't use ES for storage
TaylorTerm MACRO
LOCAL Ratio
add Factorial, one
jnc SHORT Ratio
rcr Factorial, 1
shr Num, 1
shr one, 1
Ratio:
mul Num
div Factorial
ENDM
_4_ dq 4.0
_2_ dq 2.0
_1_ dq 1.0
PointFive dq 0.5
temp dq ?
Sign dw ?
extrn fpu:word
infinity dq 1.0E+300
.code
FPUcplxmul PROC x:word, y:word, z:word
mov bx, x
fld QWORD PTR [bx] ; x.x
fld QWORD PTR [bx+8] ; x.y, x.x
mov bx, y
fld QWORD PTR [bx] ; y.x, x.y, x.x
fld QWORD PTR [bx+8] ; y.y, y.x, x.y, x.x
mov bx, z
fld st ; y.y, y.y, y.x, x.y, x.x
fmul st, st(3) ; y.y*x.y, y.y. y.x, x.y, x.x
fld st(2) ; y.x, y.y*x.y, y.y, y.x, x.y, x.x
fmul st, st(5) ; y.x*x.x, y.y*x.y, y.y, y.x, x.y, x.x
fsubr ; y.x*x.x - y.y*x.y, y.y, y.x, x.y, x.x
fstp QWORD PTR [bx] ; y.y, y.x, x.y, x.x
fmulp st(3), st ; y.x, x.y, x.x*y.y
fmul ; y.x*x.y, x.x*y.y
fadd ; y.x*x.y + x.x*y.y
fstp QWORD PTR [bx+8]
ret
FPUcplxmul ENDP
FPUcplxdiv PROC x:word, y:word, z:word
LOCAL Status:WORD
mov bx, x
fld QWORD PTR [bx] ; x.x
fld QWORD PTR [bx+8] ; x.y, x.x
mov bx, y
fld QWORD PTR [bx] ; y.x, x.y, x.x
fld QWORD PTR [bx+8] ; y.y, y.x, x.y, x.x
fld st ; y.y, y.y, y.x, x.y, x.x
fmul st, st ; y.y*y.y, y.y, y.x, x.y, x.x
fld st(2) ; y.x, y.y*y.y, y.y, y.x, x.y, x.x
fmul st, st ; y.x*y.x, y.y*y.y, y.y, y.x, x.y, x.x
fadd ; mod, y.y, y.x, x.y, x.x
ftst ; test whether mod is (0,0)
fstsw Status
mov ax, Status
and ah, 01000101b
cmp ah, 01000000b
jne NotZero
fstp st
fstp st
fstp st
fstp st
fstp st
fld infinity
fld st
mov bx, z
fstp QWORD PTR [bx]
fstp QWORD PTR [bx+8]
; mov ax,save_release
; cmp ax,1920
; jle ExitDiv ; before 19.20 overflow wasn't set
102 mov overflow, 1
103 jmp ExitDiv
104
105 NotZero:
106 fdiv st(1), st ; mod, y.y=y.y/mod, y.x, x.y, x.x
107 fdivp st(2), st ; y.y, y.x=y.x/mod, x.y, x.x
108 mov bx, z
109 fld st ; y.y, y.y, y.x, x.y, x.x
110 fmul st, st(3) ; y.y*x.y, y.y. y.x, x.y, x.x
111 fld st(2) ; y.x, y.y*x.y, y.y, y.x, x.y, x.x
112 fmul st, st(5) ; y.x*x.x, y.y*x.y, y.y, y.x, x.y, x.x
113 fadd ; y.x*x.x - y.y*x.y, y.y, y.x, x.y, x.x
114 fstp QWORD PTR [bx] ; y.y, y.x, x.y, x.x
115 fmulp st(3), st ; y.x, x.y, x.x*y.y
116 fmul ; y.x*x.y, x.x*y.y
117 fsubr ; y.x*x.y + x.x*y.y
118 fstp QWORD PTR [bx+8]
119
120 ExitDiv:
121 ret
122 FPUcplxdiv ENDP
123
124
125
126 FPUcplxlog PROC x:word, z:word
127 LOCAL Status:word, ImagZero:WORD
128 mov bx, x
129
130 mov ax, WORD PTR [bx+8+6]
131 mov ImagZero, ax
132 or ax, WORD PTR [bx+6]
133 jnz NotBothZero
134
135 fldz
136 fldz
137 jmp StoreZX
138
139 NotBothZero:
140 fld QWORD PTR [bx+8] ; x.y
141 fld QWORD PTR [bx] ; x.x, x.y
142 mov bx, z
143 fldln2 ; ln2, x.x, x.y
144 fdiv _2_ ; ln2/2, x.x, x.y
145 fld st(2) ; x.y, ln2/2, x.x, x.y
146 fmul st, st ; sqr(x.y), ln2/2, x.x, x.y
147 fld st(2) ; x.x, sqr(x.y), ln2/2, x.x, x.y
148 fmul st, st ; sqr(x.x), sqr(x.y), ln2/2, x.x, x.y
149 fadd ; mod, ln2/2, x.x, x.y
150 fyl2x ; z.x, x.x, x.y
151 fxch st(2) ; x.y, x.x, z.x
152 fxch ; x.x, x.y, z.x
153 cmp fpu, 387
154 jb Restricted
155
156 fpatan ; z.y, z.x
157 jmp StoreZX
158
159 Restricted:
160 mov bx, x
161 mov dh, BYTE PTR [bx+7]
162 or dh, dh
163 jns ChkYSign
164
165 fchs ; |x.x|, x.y, z.x
166
167 ChkYSign:
168 mov dl, BYTE PTR [bx+8+7]
169 or dl, dl
170 jns ChkMagnitudes
171
172 fxch ; x.y, |x.x|, z.x
173 fchs ; |x.y|, |x.x|, z.x
174 fxch ; |x.x|, |x.y|, z.x
175
176 ChkMagnitudes:
177 fcom st(1) ; x.x, x.y, z.x
178 fstsw Status ; x.x, x.y, z.x
179 test Status, 4500h
180 jz XisGTY
181
182 test Status, 4000h
183 jz XneY
184
185 fstp st ; x.y, z.x
186 fstp st ; z.x
187 fldpi ; Pi, z.x
188 fdiv _4_ ; Pi/4, z.x
189 jmp ChkSignZ
190
191 XneY:
192 fxch ; x.y, x.x, z.x
193 fpatan ; Pi/2 - Angle, z.x
194 fldpi ; Pi, Pi/2 - Angle, z.x
195 fdiv _2_ ; Pi/2, Pi/2 - Angle, z.x
196 fsubr ; Angle, z.x
197 jmp ChkSignZ
198
199 XisGTY:
200 fpatan ; Pi-Angle or Angle+Pi, z.x
201
202 ChkSignZ:
203 or dh, dh
204 js NegX
205
206 or dl, dl
207 jns StoreZX
208
209 fchs
210 jmp StoreZX
211
212 NegX:
213 or dl, dl
214 js QuadIII
215
216 fldpi ; Pi, Pi-Angle, z.x
217 fsubr ; Angle, z.x
218 jmp StoreZX
219
220 QuadIII:
221 fldpi ; Pi, Angle+Pi, z.x
222 fsub ; Angle, z.x
223
224 StoreZX:
225 mov bx, z
226 fstp QWORD PTR [bx+8] ; z.x
227 fstp QWORD PTR [bx] ;
228 ret
229 FPUcplxlog ENDP
230
231 FPUsinhcosh PROC x:word, sinh:word, cosh:word
232 LOCAL Control:word
233 fstcw Control
234 push Control ; Save control word on the stack
235 or Control, 0000110000000000b
236 fldcw Control ; Set control to round towards zero
237
238 mov Sign, 0 ; Assume the sign is positive
239 mov bx, x
240
241 fldln2 ; ln(2)
242 fdivr QWORD PTR [bx] ; x/ln(2)
243
244 cmp BYTE PTR [bx+7], 0
245 jns DuplicateX
246
247 fchs ; x = |x|
248
249 DuplicateX:
250 fld st ; x/ln(2), x/ln(2)
251 frndint ; int = integer(|x|/ln(2)), x/ln(2)
252 fxch ; x/ln(2), int
253 fsub st, st(1) ; rem < 1.0, int
254 fmul PointFive ; rem/2 < 0.5, int
255 ; CAE 7Dec92 changed above from divide by 2 to multiply by .5
256 f2xm1 ; (2**rem/2)-1, int
257 fadd _1_ ; 2**rem/2, int
258 fmul st, st ; 2**rem, int
259 fscale ; e**|x|, int
260 fstp st(1) ; e**|x|
261
262 cmp BYTE PTR [bx+7], 0
263 jns ExitFexp
264
265 fdivr _1_ ; e**x
266
267 ExitFexp:
268 fld st ; e**x, e**x
269 fdivr PointFive ; e**-x/2, e**x
270 fld st ; e**-x/2, e**-x/2, e**x
271 fxch st(2) ; e**x, e**-x/2, e**-x/2
272 fmul PointFive ; e**x/2, e**-x/2, e**-x/2
273 ; CAE 7Dec92 changed above from divide by 2 to multiply by .5
274 fadd st(2), st ; e**x/2, e**-x/2, cosh(x)
275 fsubr ; sinh(x), cosh(x)
276
277 mov bx, sinh ; sinh, cosh
278 fstp QWORD PTR [bx] ; cosh
279 mov bx, cosh
280 fstp QWORD PTR [bx] ;
281
282 pop Control
283 fldcw Control ; Restore control word
284 ret
285 FPUsinhcosh ENDP
286
287
288 FPUsincos PROC x:word, sinx:word, cosx:word
289 LOCAL Status:word
290 mov bx, x
291 fld QWORD PTR [bx] ; x
292
293 cmp fpu, 387
294 jb Use387FPUsincos
295
296 call _Loaded387sincos ; cos(x), sin(x)
297 mov bx, cosx
298 fstp QWORD PTR [bx] ; sin(x)
299 mov bx, sinx
300 fstp QWORD PTR [bx] ;
301 ret
302
303 Use387FPUsincos:
304
305 sub sp, 8 ; save 'x' on the CPU stack
306 mov bx, sp
307 fstp QWORD PTR [bx] ; FPU stack:
308
309 call cos
310
311 add sp, 8 ; take 'cos(x)' off the CPU stack
312 mov bx, ax
313 cmp compiled_by_turboc,0
314 jne turbo_c1
315
316 fld QWORD PTR [bx] ; FPU stack: cos(x)
317
318 turbo_c1:
319 fld st ; FPU stack: cos(x), cos(x)
320 fmul st, st ; cos(x)**2, cos(x)
321 fsubr _1_ ; sin(x)**2, cos(x)
322 fsqrt ; +/-sin(x), cos(x)
323
324 mov bx, x
325 fld QWORD PTR [bx] ; x, +/-sin(x), cos(x)
326 fldpi ; Pi, x, +/-sin(x), cos(x)
327 fadd st, st ; 2Pi, x, +/-sin(x), cos(x)
328 fxch ; |x|, 2Pi, +/-sin(x), cos(x)
329 fprem ; Angle, 2Pi, +/-sin(x), cos(x)
330 fstp st(1) ; Angle, +/-sin(x), cos(x)
331 fldpi ; Pi, Angle, +/-sin(x), cos(x)
332
333 cmp BYTE PTR [bx+7], 0
334 jns SignAlignedPi
335
336 fchs ; -Pi, Angle, +/-sin(x), cos(x)
337
338 SignAlignedPi:
339 fcompp ; +/-sin(x), cos(x)
340 fstsw Status ; +/-sin(x), cos(x)
341
342 mov ax, Status
343 and ah, 1
344 jz StoreSinCos ; Angle <= Pi
345
346 fchs ; sin(x), cos(x)
347
348 StoreSinCos:
349 mov bx, sinx
350 fstp QWORD PTR [bx] ; cos(x)
351 mov bx, cosx
352 fstp QWORD PTR [bx] ;
353 ret
354 FPUsincos ENDP
355
356
357 PUBLIC r16Mul
358 r16Mul PROC uses si di, x1:word, x2:word, y1:word, y2:word
359 mov si, x1
360 mov bx, x2
361 mov di, y1
362 mov cx, y2
363
364 xor ax, ax
365 shl bx, 1
366 jz Exitr16Mult ; Destination is zero
367
368 rcl ah, 1
369 shl cx, 1
370 jnz Chkr16Exp
371 xor bx, bx ; Source is zero
372 xor si, si
373 jmp Exitr16Mult
374
375 Chkr16Exp:
376 rcl al, 1
377 xor ah, al ; Resulting sign in ah
378 stc ; Put 'one' bit back into number
379 rcr bl, 1
380 stc
381 rcr cl, 1
382
383 sub ch, 127 ; Determine resulting exponent
384 add bh, ch
385 mov al, bh
386 mov fake_es, ax ; es has the resulting exponent and sign
387
388 mov ax, di
389 mov al, ah
390 mov ah, cl
391
392 mov dx, si
393 mov dl, dh
394 mov dh, bl
395
396 mul dx
397 mov cx, fake_es
398
399 shl ax, 1
400 rcl dx, 1
401 jnc Remr16MulOneBit ; 'One' bit is the next bit over
402
403 inc cl ; 'One' bit removed with previous shift
404 jmp Afterr16MulNorm
405
406 Remr16MulOneBit:
407 shl ax, 1
408 rcl dx, 1
409
410 Afterr16MulNorm:
411 mov bl, dh ; Perform remaining 8 bit shift
412 mov dh, dl
413 mov dl, ah
414 mov si, dx
415 mov bh, cl ; Put in the exponent
416 rcr ch, 1 ; Get the sign
417 rcr bx, 1 ; Normalize the result
418 rcr si, 1
419 Exitr16Mult:
420 mov ax, si
421 mov dx, bx
422 ret
423 r16Mul ENDP
424
425
426 PUBLIC RegFloat2Fg
427 RegFloat2Fg PROC x1:word, x2:word, Fudge:word
428 mov ax, WORD PTR x1
429 mov dx, WORD PTR x2
430 mov bx, ax
431 or bx, dx
432 jz ExitRegFloat2Fg
433
434 xor bx, bx
435 mov cx, bx
436
437 shl ax, 1
438 rcl dx, 1
439 rcl bx, 1 ; bx contains the sign
440
441 xchg cl, dh ; cx contains the exponent
442
443 stc ; Put in the One bit
444 rcr dl, 1
445 rcr ax, 1
446
447 sub cx, 127 + 23
448 add cx, Fudge
449 jz ChkFgSign
450 jns ShiftFgLeft
451
452 neg cx
453 ShiftFgRight:
454 shr dx, 1
455 rcr ax, 1
456 loop ShiftFgRight
457 jmp ChkFgSign
458
459 ShiftFgLeft:
460 shl ax, 1
461 rcl dx, 1
462 loop ShiftFgLeft
463
464 ChkFgSign:
465 or bx, bx
466 jz ExitRegFloat2Fg
467
468 not ax
469 not dx
470 add ax, 1
471 adc dx, 0
472
473 ExitRegFloat2Fg:
474 ret
475 RegFloat2Fg ENDP
476
477 PUBLIC RegFg2Float
478 RegFg2Float PROC x1:word, x2:word, FudgeFact:byte
479 mov ax, x1
480 mov dx, x2
481
482 mov cx, ax
483 or cx, dx
484 jz ExitFudgedToRegFloat
485
486 mov ch, 127 + 32
487 sub ch, FudgeFact
488 xor cl, cl
489 shl ax, 1 ; Get the sign bit
490 rcl dx, 1
491 jnc FindOneBit
492
493 inc cl ; Fudged < 0, convert to postive
494 not ax
495 not dx
496 add ax, 1
497 adc dx, 0
498
499 FindOneBit:
500 shl ax, 1
501 rcl dx, 1
502 dec ch
503 jnc FindOneBit
504 dec ch
505
506 mov al, ah
507 mov ah, dl
508 mov dl, dh
509 mov dh, ch
510
511 shr cl, 1 ; Put sign bit in
512 rcr dx, 1
513 rcr ax, 1
514
515 ExitFudgedToRegFloat:
516 ret
517 RegFg2Float ENDP
518
519
520 PUBLIC ExpFudged
521 ExpFudged PROC uses si, x_low:word, x_high:word, Fudge:word
522 LOCAL exp:WORD
523 xor ax, ax
524 mov WORD PTR Ans, ax
525 mov WORD PTR Ans + 2, ax
526 mov ax, WORD PTR x_low
527 mov dx, WORD PTR x_high
528 or dx, dx
529 js NegativeExp
530
531 div Ln2Fg16
532 mov exp, ax
533 or dx, dx
534 jz Raiseexp
535
536 mov ax, dx
537 mov si, dx
538 mov bx, 1
539
540 PosExpLoop:
541 add WORD PTR Ans, ax
542 adc WORD PTR Ans + 2, 0
543 inc bx
544 mul si
545 mov ax, dx
546 xor dx, dx
547 div bx
548 or ax, ax
549 jnz PosExpLoop
550
551 Raiseexp:
552 inc WORD PTR Ans + 2
553 mov ax, WORD PTR Ans
554 mov dx, WORD PTR Ans + 2
555 mov cx, -16
556 add cx, Fudge
557 add cx, exp
558 or cx, cx
559 jz ExitExpFudged
560 jns LeftShift
561 neg cx
562
563 RightShift:
564 shr dx, 1
565 rcr ax, 1
566 loop RightShift
567 jmp ExitExpFudged
568
569 NegativeExp:
570 not ax
571 not dx
572 add ax, 1
573 adc dx, 0
574 div Ln2Fg16
575 neg ax
576 mov exp, ax
577
578 or dx, dx
579 jz Raiseexp
580
581 mov ax, dx
582 mov si, dx
583 mov bx, 1
584
585 NegExpLoop:
586 sub WORD PTR Ans, ax
587 sbb WORD PTR Ans + 2, 0
588 inc bx
589 mul si
590 mov ax, dx
591 xor dx, dx
592 div bx
593 or ax, ax
594 jz Raiseexp
595
596 add WORD PTR Ans, ax
597 adc WORD PTR Ans + 2, 0
598 inc bx
599 mul si
600 mov ax, dx
601 xor dx, dx
602 div bx
603 or ax, ax
604 jnz NegExpLoop
605 jmp Raiseexp
606
607 LeftShift:
608 shl ax, 1
609 rcl dx, 1
610 loop LeftShift
611
612 ExitExpFudged:
613 ret
614 ExpFudged ENDP
615
616
617
618 PUBLIC LogFudged
619 LogFudged PROC uses si di, x_low:word, x_high:word, Fudge:word
620 LOCAL exp:WORD
621 xor bx, bx
622 mov cx, 16
623 sub cx, Fudge
624 mov ax, x_low
625 mov dx, x_high
626
627 or dx, dx
628 jz ChkLowWord
629
630 Incexp:
631 shr dx, 1
632 jz DetermineOper
633 rcr ax, 1
634 inc cx
635 jmp Incexp
636
637 ChkLowWord:
638 or ax, ax
639 jnz Decexp
640 jmp ExitLogFudged
641
642 Decexp:
643 dec cx ; Determine power of two
644 shl ax, 1
645 jnc Decexp
646
647 DetermineOper:
648 mov exp, cx
649 mov si, ax ; si =: x + 1
650 shr si, 1
651 stc
652 rcr si, 1
653 mov dx, ax
654 xor ax, ax
655 shr dx, 1
656 rcr ax, 1
657 shr dx, 1
658 rcr ax, 1 ; dx:ax = x - 1
659 div si
660
661 mov bx, ax ; ax, Fudged 16, max of 0.3333333
662 shl ax, 1 ; x = (x - 1) / (x + 1), Fudged 16
663 mul ax
664 shl ax, 1
665 rcl dx, 1
666 mov ax, dx ; dx:ax, Fudged 35, max = 0.1111111
667 mov si, ax ; si = (ax * ax), Fudged 19
668
669 mov ax, bx
670 ; bx is the accumulator, First term is x
671 mul si ; dx:ax, Fudged 35, max of 0.037037
672 mov fake_es, dx ; Save high word, Fudged (35 - 16) = 19
673 mov di, 0c000h ; di, 3 Fudged 14
674 div di ; ax, Fudged (36 - 14) = 21
675 or ax, ax
676 jz Addexp
677
678 mov cl, 5
679 shr ax, cl
680 add bx, ax ; bx, max of 0.345679
681 ; x = x + x**3/3
682
683 mov ax, fake_es ; ax, Fudged 19
684 mul si ; dx:ax, Fudged 38, max of 0.004115
685 mov fake_es, dx ; Save high word, Fudged (38 - 16) = 22
686 mov di, 0a000h ; di, 5 Fudged 13
687 div di ; ax, Fudged (38 - 13) = 25
688 or ax, ax
689 jz Addexp
690
691 mov cl, 9
692 shr ax, cl
693 add bx, ax
694 ; x = x + x**3/3 + x**5/5
695
696 mov ax, fake_es ; ax, Fudged 22
697 mul si ; dx:ax, Fudged 41, max of 0.0004572
698 mov di, 0e000h ; di, 7 Fudged 13
699 div di ; ax, Fudged (41 - 13) = 28
700 mov cl, 12
701 shr ax, cl
702 add bx, ax
703
704 Addexp:
705 shl bx, 1 ; bx *= 2, Fudged 16, max of 0.693147
706 ; x = 2 * (x + x**3/3 + x**5/5 + x**7/7)
707 mov cx, exp
708 mov ax, Ln2Fg16 ; Answer += exp * Ln2Fg16
709 or cx, cx
710 js SubFromAns
711
712 mul cx
713 add ax, bx
714 adc dx, 0
715 jmp ExitLogFudged
716
717 SubFromAns:
718 neg cx
719 mul cx
720 xor cx, cx
721 xchg cx, dx
722 xchg bx, ax
723 sub ax, bx
724 sbb dx, cx
725
726 ExitLogFudged:
727 ; x = 2 * (x + x**3/3 + x**5/5 + x**7/7) + (exp * Ln2Fg16)
728 ret
729 LogFudged ENDP
730
731
732
733
734 PUBLIC LogFloat14
735 LogFloat14 PROC x1:word, x2:word
736 mov ax, WORD PTR x1
737 mov dx, WORD PTR x2
738 shl ax, 1
739 rcl dx, 1
740 xor cx, cx
741 xchg cl, dh
742
743 stc
744 rcr dl, 1
745 rcr ax, 1
746
747 sub cx, 127 + 23
748 neg cx
749 push cx
750 push dx
751 push ax
752 call LogFudged
753 add sp, 6
754 ret
755 LogFloat14 ENDP
756
757
758 PUBLIC RegSftFloat
759 RegSftFloat PROC x1:word, x2:word, Shift:byte
760 mov ax, x1
761 mov dx, x2
762
763 shl dx, 1
764 rcl cl, 1
765
766 add dh, Shift
767
768 shr cl, 1
769 rcr dx, 1
770
771 ret
772 RegSftFloat ENDP
773
774
775
776
777 PUBLIC RegDivFloat
778 RegDivFloat PROC uses si di, x1:word, x2:word, y1:word, y2:word
779 mov si, x1
780 mov bx, x2
781 mov di, y1
782 mov cx, y2
783
784 xor ax, ax
785 shl bx, 1
786 jnz ChkOtherOp
787 jmp ExitRegDiv ; Destination is zero
788
789 ChkOtherOp:
790 rcl ah, 1
791 shl cx, 1
792 jnz ChkDivExp
793 xor bx, bx ; Source is zero
794 xor si, si
795 jmp ExitRegDiv
796
797 ChkDivExp:
798 rcl al, 1
799 xor ah, al ; Resulting sign in ah
800 stc ; Put 'one' bit back into number
801 rcr bl, 1
802 stc
803 rcr cl, 1
804
805 sub ch, 127 ; Determine resulting exponent
806 sub bh, ch
807 mov al, bh
808 mov fake_es, ax ; es has the resulting exponent and sign
809
810 mov ax, si ; 8 bit shift, bx:si moved to dx:ax
811 mov dh, bl
812 mov dl, ah
813 mov ah, al
814 xor al, al
815
816 mov bh, cl ; 8 bit shift, cx:di moved to bx:cx
817 mov cx, di
818 mov bl, ch
819 mov ch, cl
820 xor cl, cl
821
822 shr dx, 1
823 rcr ax, 1
824
825 div bx
826 mov si, dx ; Save (and shift) remainder
827 mov dx, cx ; Save the quess
828 mov cx, ax
829 mul dx ; Mult quess times low word
830 xor di, di
831 sub di, ax ; Determine remainder
832 sbb si, dx
833 mov ax, di
834 mov dx, si
835 jc RemainderNeg
836
837 xor di, di
838 jmp GetNextDigit
839
840 RemainderNeg:
841 mov di, 1 ; Flag digit as negative
842 not ax ; Convert remainder to positive
843 not dx
844 add ax, 1
845 adc dx, 0
846
847 GetNextDigit:
848 shr dx, 1
849 rcr ax, 1
850 div bx
851 xor bx, bx
852 shl dx, 1
853 rcl ax, 1
854 rcl bl, 1 ; Save high bit
855
856 mov dx, cx ; Retrieve first digit
857 or di, di
858 jz RemoveDivOneBit
859
860 neg ax ; Digit was negative
861 neg bx
862 dec dx
863
864 RemoveDivOneBit:
865 add dx, bx
866 mov cx, fake_es
867 shl ax, 1
868 rcl dx, 1
869 jc AfterDivNorm
870
871 dec cl
872 shl ax, 1
873 rcl dx, 1
874
875 AfterDivNorm:
876 mov bl, dh ; Perform remaining 8 bit shift
877 mov dh, dl
878 mov dl, ah
879 mov si, dx
880 mov bh, cl ; Put in the exponent
881 shr ch, 1 ; Get the sign
882 rcr bx, 1 ; Normalize the result
883 rcr si, 1
884
885 ExitRegDiv:
886 mov ax, si
887 mov dx, bx
888 ret
889 RegDivFloat ENDP
890
891
892
893 Term equ
894 Num equ
895 Factorial equ
896 Sin equ
897 Cos equ
898 e equ
899 Inve equ
900
901 SinCos086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinAddr:WORD, \
902 CosAddr:WORD
903 mov ax, LoNum
904 mov dx, HiNum
905
906 xor cx, cx
907 ; mov SinNeg, cx ;CJLT - not needed now
908 ; mov CosNeg, cx ;CJLT - not needed now
909 mov a, cx ;CJLT - Not needed by the original code, but it
910 ; is now!
911 or dx, dx
912 jns AnglePositive
913
914 not ax
915 not dx
916 add ax, 1
917 adc dx, cx ;conveniently zero
918 mov a,8 ;a now has 4 bits: Sign+quadrant+octant
919
920 AnglePositive:
921 mov si, ax
922 mov di, dx
923 mul WORD PTR InvPiFg33
924 mov bx, dx
925 mov ax, di
926 mul WORD PTR InvPiFg33
927 add bx, ax
928 adc cx, dx
929 mov ax, si
930 mul WORD PTR InvPiFg33 + 2
931 add bx, ax
932 adc cx, dx
933 mov ax, di
934 mul WORD PTR InvPiFg33 + 2
935 add ax, cx
936 adc dx, 0
937
938 and dx, 3 ;Remove multiples of 2 pi
939 shl dx, 1 ;move over to make space for octant number
940 ;
941 ;CJLT - new code to reduce angle to: 0 <= angle <= 45
942 ;
943 or ax, ax
944 jns Lessthan45
945 inc dx ;octant number
946 not ax ;angle=90-angle if it was >45 degrees
947 Lessthan45:
948 add a, dx ;remember octant and Sign in a
949 mov Num, ax ;ax=Term, now
950 ;
951 ; We do the Taylor series with all numbers scaled by pi/2
952 ; so InvPiFg33 represents one. Truly.
953 ;
954 mov Factorial, WORD PTR InvPiFg33 + 2
955 mov one, Factorial
956 mov Cos, Factorial ; Cos = 1
957 mov Sin, Num ; Sin = Num
958
959 LoopIntSinCos:
960 TaylorTerm ; ax=Term = Num * (Num/2) * (Num/3) * . . .
961 sub Cos, Term ; Cos = 1 - Num*(Num/2) + (Num**4)/4! - . . .
962 cmp Term, WORD PTR TrigLimit
963 jbe SHORT ExitIntSinCos
964
965 TaylorTerm
966 sub Sin, Term ; Sin = Num - Num*(Num/2)*(Num/3) + (Num**5)/5! - . . .
967 cmp Term, WORD PTR TrigLimit
968 jbe SHORT ExitIntSinCos
969
970 TaylorTerm
971 add Cos, Term
972 cmp Term, WORD PTR TrigLimit
973 jbe SHORT ExitIntSinCos
974
975 TaylorTerm ; Term = Num * (x/2) * (x/3) * . . .
976 add Sin, Term
977 cmp Term, WORD PTR TrigLimit
978 jnbe LoopIntSinCos
979
980 ExitIntSinCos:
981 xor ax, ax
982 mov cx, ax
983 cmp Cos, WORD PTR InvPiFg33 + 2
984 jb CosDivide ; Cos < 1.0
985
986 inc cx ; Cos == 1.0
987 jmp StoreCos
988
989 CosDivide:
990 mov dx, Cos
991 div WORD PTR InvPiFg33 + 2
992
993 StoreCos:
994 mov Cos, ax ; cx:Cos
995
996 xor ax, ax
997 mov bx, ax
998 cmp Sin, WORD PTR InvPiFg33 + 2
999 jb SinDivide ; Sin < 1.0
1000
1001 inc bx ; Sin == 1.0
1002 jmp StoreSin
1003
1004 SinDivide:
1005 mov dx, Sin
1006 div WORD PTR InvPiFg33 + 2
1007
1008 StoreSin:
1009 mov Sin, ax ; bx:Sin
1010 ; CJLT again. New tests are needed to correct signs and exchange sin/cos values
1011 mov ax, a
1012 inc al ;forces bit 1 to xor of previous bits 0 and 1
1013 test al, 2
1014 jz ChkNegCos
1015
1016 xchg cx, bx
1017 xchg Sin, Cos
1018 ; mov ax, SinNeg commented out by CJLT. This was a bug.
1019 ; xchg ax, CosNeg
1020 ; mov CosNeg, ax and this was meant to be mov SinNeg,ax
1021
1022 ChkNegCos:
1023 inc al ;forces bit 2 to xor of original bits 1 and 2
1024 test al, 4
1025 jz ChkNegSin
1026
1027 not Cos ;negate cos if quadrant 2 or 3
1028 not cx
1029 add Cos, 1
1030 adc cx, 0
1031
1032 ChkNegSin:
1033 inc al
1034 inc al ;forces bit 3 to xor of original bits 2 and 3
1035 test al, 8
1036 jz CorrectQuad
1037
1038 not Sin
1039 not bx
1040 add Sin, 1
1041 adc bx, 0
1042
1043 CorrectQuad:
1044
1045 CosPolarized:
1046 mov dx, bx
1047 mov bx, CosAddr
1048 mov WORD PTR [bx], Cos
1049 mov WORD PTR [bx+2], cx
1050
1051 SinPolarized:
1052 mov bx, SinAddr
1053 mov WORD PTR [bx], Sin
1054 mov WORD PTR [bx+2], dx
1055 ret
1056 SinCos086 ENDP
1057
1058
1059 PUBLIC ArcTan086
1060 ;
1061 ;Used to calculate logs of complex numbers, since log(R,theta)=log(R)+i.theta
1062 ; in polar coordinates.
1063 ;
1064 ;In C we need the prototype:
1065 ;long ArcTan086(long, long)
1066
1067 ;The parameters are x and y, the returned value arctan(y/x) in the range 0..2pi.
1068 ArcTan086 PROC uses si di, x1:word, x2:word, y1:word, y2:word
1069 xor ax, ax ;ax=0
1070 mov si, x1 ;Lo
1071 mov bx, x2 ;Hi
1072 or bx, bx ;Sign set ?
1073 jns xplus
1074 inc ax
1075 not si
1076 not bx
1077 add bx, 1
1078 adc si, 0 ;We have abs(x) now
1079 xplus:
1080 mov di, y1 ;Lo
1081 mov cx, y2 ;Hi
1082 or cx, cx ;Sign set?
1083 jns yplus
1084 inc ax
1085 inc ax ;Set bit 1 of ax
1086 shl ax, 1 ; and shift it all left
1087 not di
1088 not cx
1089 add di, 1
1090 adc cx, 0 ;We have abs(y) now
1091 yplus:
1092 cmp bx, cx ;y>x?
1093 jl no_xchg
1094 jg xchg_xy
1095 cmp si, di ;Hi words zero. What about lo words?
1096 jle no_xchg
1097 xchg_xy: ;Exchange them
1098 inc ax ;Flag the exchange
1099 xchg si, di
1100 xchg bx, cx
1101 no_xchg:
1102 mov SinNeg, ax ;Remember signs of x and y, and whether exchanged
1103 or cx, cx ;y=1.0 ?
1104 jz ynorm ;no, so continue
1105 normy: ;yes, normalise by reducing y to 16 bits max.
1106 rcr cx, 1 ; (We don't really lose any precision)
rcr di, 1
clc
rcr bx, 1
rcr si, 1
or cx, cx
jnz normy
ynorm:
ret
ArcTan086 ENDP
;
;There now follows Chris Lusby Taylor's novel (such modesty!) method
1107 ;of calculating exp(x). Originally, I was going to do it by decomposing x
1108 ;into a sum of terms of the form:
1109 ;
1110 ; th -i
1111 ; i term=ln(1+2 )
1112 ; -i
1113 ;If x>term[i] we subtract term[i] from x and multiply the answer by 1 + 2
1114 ;
1115 ;We can multiply by that factor by shifting and adding. Neat eh!
1116 ;
1117 ;Unfortunately, however, for 16 bit accuracy, that method turns out to be
1118 ;slower than what follows, which is also novel. Here, I first divide x not
1119 ;by ln(2) but ln(2)/16 so that we can look up an initial answer in a table of
1120 ;2^(n/16). This leaves the remainder so small that the polynomial Taylor series
1121 ;converges in just 1+x+x^2/2 which is calculated as 1+x(1+x/2).
1122 ;
1123 _e2xLT PROC ;argument in dx.ax (bitshift=16 is hard coded here)
1124 or dx, dx
1125 jns CalcExpLT
1126
1127 not ax ; if negative, calc exp(abs(x))
1128 not dx
1129 add ax, 1
1130 adc dx, 0
1131
1132 CalcExpLT:
1133 shl ax, 1
1134 rcl dx, 1
1135 shl ax, 1
1136 rcl dx, 1
1137 shl ax, 1
1138 rcl dx, 1 ;x is now in fg19 form for accuracy
1139 ; so, relative to fg16, we have:
1140 div Ln2Fg15 ; 8x==(a * Ln(2)/2) + Rem
1141 ; x=(a * Ln(2)/16) + Rem/8
1142 ;so exp(x)=exp((a * Ln(2)/16) + Rem/8)
1143 ; =exp(a/16 * Ln(2)) * exp(Rem/8)
1144 ; =2^(a/16) * exp(Rem)
1145 ;a mod 16 will give us an initial estimate
1146 mov cl, 4
1147 mov di, ax ;Remember original
1148 shr ax, cl
1149 mov a, ax ;a/16 will give us a bit shift
1150 mov ax, di
1151 and ax, 0000fh ;a mod 16
1152 shl ax, 1 ;used as an index into 2 byte per element Table
1153 mov di, ax
1154 dec cx ;3, please
1155 add dx, 4 ;to control rounding up/down
1156 shr dx, cl ;Num=Rem/8 (convert back to fg16)
1157 ;
1158 mov ax, dx
1159 shr ax, 1 ;Num/2 fg 16
1160 inc ax ;rounding control
1161 stc
1162 rcr ax, 1 ;1+Num/2 fg15
1163 mul dx ;dx:ax=Num(1+Num/2) fg31, so dx alone is fg15
1164 shl ax, 1 ;more rounding control
1165 adc dx, 8000H ;dx=1+Num(1+Num/2) fg15
1166 mov ax, Table[di] ;Get table entry fg15
1167 mul dx ;Only two multiplys used! fg14, now (15+15-16)
1168 shl ax, 1
1169 rcl dx, 1 ;fg15
1170 mov e, dx
1171 ret ; return e=e**x * (2**15), 1 < e**x < 2
1172 _e2xLT ENDP ; a= bitshift needed
1173
1174
1175
1176 Exp086 PROC uses si di, LoNum:WORD, HiNum:WORD
1177 mov ax, LoNum
1178 mov dx, HiNum
1179 ; mov overflow, 0
1180
1181 call _e2xLT ;Use Chris Lusby Taylor's e2x
cmp a, 15 ;CJLT - was 16
jae Overflow
; cmp expSign, 0
; jnz NegNumber
cmp HiNum, 0 ;CJLT - we don't really need expSign
1182 jl NegNumber
1183
1184 mov ax, e
1185 mov dx, ax
1186 inc a
1187 mov cx, 16
1188 sub cx, a
1189 shr dx, cl
1190 mov cx, a
1191 shl ax, cl
1192 jmp ExitExp086
1193
1194 Overflow:
1195 xor ax, ax
1196 xor dx, dx
1197 mov overflow, 1
1198 jmp ExitExp086
1199
1200 NegNumber:
1201 cmp e, 8000h
1202 jne DivideE
1203
1204 mov ax, e
1205 dec a
1206 jmp ShiftE
1207
1208 DivideE:
1209 xor ax, ax
1210 mov dx, ax
1211 stc
1212 rcr dx, 1
1213 div e
1214
1215 ShiftE:
1216 xor dx, dx
1217 mov cx, a
1218 shr ax, cl
1219
1220 ExitExp086:
1221 ret
1222 Exp086 ENDP
1223
1224
1225
1226 SinhCosh086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinhAddr:WORD, \
1227 CoshAddr:WORD
1228 mov ax, LoNum
1229 mov dx, HiNum
1230
1231 call _e2xLT ;calculate exp(|x|) fg 15
1232 ;answer is e*2^a
1233 cmp e, 8000h ;e==1 ?
1234 jne InvertE ; e > 1, so we can invert it.
1235
1236 mov dx, 1
1237 xor ax, ax
1238 cmp a, 0
1239 jne Shiftone
1240
1241 mov e, ax
1242 mov cx, ax
1243 jmp ChkSinhSign
1244
1245 Shiftone:
1246 mov cx, a
1247 shl dx, cl
1248 dec cx
1249 shr e, cl
1250 shr dx, 1
1251 shr e, 1
1252 mov cx, dx
1253 sub ax, e
1254 sbb dx, 0
1255 xchg ax, e
1256 xchg dx, cx
1257 jmp ChkSinhSign
1258
1259 InvertE:
1260 xor ax, ax ; calc 1/e
1261 mov dx, 8000h
1262 div e
1263
1264 mov Inve, ax
1265
1266 ShiftE:
1267 mov cx, a
1268 shr Inve, cl
1269 inc cl
1270 mov dx, e
1271 shl e, cl
1272 neg cl
1273 add cl, 16
1274 shr dx, cl
1275 mov cx, dx ; cx:e == e**Exp
1276
1277 mov ax, e ; dx:e == e**Exp
1278 add ax, Inve
1279 adc dx, 0
1280 shr dx, 1
1281 rcr ax, 1 ; cosh(Num) = (e**Exp + 1/e**Exp) / 2
1282
1283 sub e, Inve
1284 sbb cx, 0
1285 sar cx, 1
1286 rcr e, 1
1287
1288 ChkSinhSign:
1289 or HiNum, 0
1290 jns StoreHyperbolics
1291
1292 not e
1293 not cx
1294 add e, 1
1295 adc cx, 0
1296
1297 StoreHyperbolics:
1298 mov bx, CoshAddr
1299 mov WORD PTR [bx], ax
1300 mov WORD PTR [bx+2], dx
1301
1302 mov bx, SinhAddr
1303 mov WORD PTR [bx], e
1304 mov WORD PTR [bx+2], cx
1305
1306 ret
1307 SinhCosh086 ENDP
1308
1309
1310
1311 Log086 PROC uses si di, LoNum:WORD, HiNum:WORD, Fudge:WORD
1312 LOCAL Exp:WORD ; not used, Accum:WORD, LoAns:WORD, HiAns:WORD
1313 ;NOTE: CJLT's method does not need LoAns, HiAns, but he hasn't yet
1314 ;taken them out
1315 xor bx, bx
1316 mov cx, Fudge
1317 mov ax, LoNum
1318 mov dx, HiNum
1319 ; mov overflow, 0
1320
1321 or dx, dx
1322 js Overflow
1323 jnz ShiftUp
1324
1325 or ax, ax
1326 jnz ShiftUp
1327
1328 Overflow:
1329 mov overflow, 1
1330 jmp ExitLog086
1331
1332 ShiftUp:
1333 inc cx ; cx = Exp
1334 shl ax, 1
1335 rcl dx, 1
1336 or dx, dx
1337 jns ShiftUp ;shift until dx in fg15 form
1338
1339 neg cx
1340 add cx, 31
1341 mov Exp, cx
1342 ;CJLT method starts here. We try to reduce x to make it very close to 1
1343 ;store LoAns in bx for now (fg 16; initially 0)
1344 mov cl,2 ;shift count
1345 redoterm2:
1346 cmp dx, 0AAABH ;x > 4/3 ?
1347 jb doterm3
1348 mov ax, dx
1349 shr ax, cl
1350 sub dx, ax ;x:=x(1-1/4)
1351 add bx, 49a5H ;ln(4/3) fg 15
1352 jmp redoterm2
1353 doterm3:
1354 inc cl ;count=3
1355 redoterm3:
1356 cmp dx, 9249H ;x > 8/7 ?
1357 jb doterm4
1358 mov ax, dx
1359 shr ax, cl
1360 sub dx, ax ;x:=x(1-1/8)
1361 add bx, 222eH ;ln(8/7)
1362 jmp redoterm3
1363 doterm4:
1364 inc cl ;count=4
1365 cmp dx, 8889H ;x > 16/15 ?
1366 jb skipterm4
1367 mov ax, dx
1368 shr ax, cl
1369 sub dx, ax ;x:=x(1-1/16)
1370 add bx, 1085h ;ln(16/15)
1371 ;No need to retry term4 as error is acceptably low if we ignore it
1372 skipterm4:
1373 ;Now we have reduced x to the range 1 <=x <1.072
1374 ;
1375 ;Now we continue with the conventional series, but x is so small we
1376 ;can ignore all terms except the first!
1377 ;i.e.:
1378 ;ln(x)=2(x-1)/(x+1)
1379 sub dx, 8000h ; dx= x-1, fg 15
1380 mov cx, dx
1381 stc
1382 rcr cx, 1 ; cx = 1 + (x-1)/2 fg 15
1383 ; = 1+x fg 14
1384 mov ax,4000H ;rounding control (Trust me)
1385 div cx ;ax=ln(x)
1386 add bx, ax ;so add it to the rest of the Ans. No carry
1387 MultExp:
1388 mov cx, Exp
1389 mov ax, Ln2Fg16
1390 or cx, cx
1391 js SubFromAns
1392
1393 mul cx ; cx = Exp * Lg2Fg16, fg 16
1394 add ax, bx ;add bx part of answer
1395 adc dx, 0
1396 jmp ExitLog086
1397
1398 SubFromAns:
1399 inc bx ;Somewhat artificial, but we need to add 1 somewhere
1400 neg cx
1401 mul cx
1402 not ax
1403 not dx
1404 add ax, bx
1405 adc dx, 0
1406
1407 ExitLog086:
1408 ret
1409 Log086 ENDP
1410
1411
1412 END
1413