i am trying to write a factorial function,but the fpu stack
is driving me nuts.
let's say that St(0)=x
ecx=x
how do you write a procedure such that we keep the partial factorial in
the fpu stack and also the cummulative x=x-1 and have the final factorial in St(0)?
any help is appreciated.
Posted on 2003-08-31 19:16:30 by jack
this works, is there a better way?
``````
fld1
cmp ecx,0
je fac3
sub ecx,1
jz fac3
fac2:
fmul st0,st1
fld st1
fld1
fsubp st1,st0
fstp st2
sub ecx,1
jnz fac2
fac3:
fstp st1
fstp st1
``````
Posted on 2003-08-31 20:15:04 by jack
HERE is an integer table method - could be converted to FPU.
Posted on 2003-08-31 20:28:51 by bitRAKE
a table lookup is a good idea.
i wrote a function for the gamma function that works quite well for positive arguments,
you loose some precision for negative arguments.
it's written in fasm style.
``````
;fasm style

; gamma(x + 1) = (x + Y + 1/2)^(x + 1/2)*exp(-(x + Y + 1/2))
; *sqrt(2*Pi)*(C0 + C1/(x + 1) + C2/(x + 2) +...+ CN/(x + N))
;
; ebx holds the address of 'x'

fld tword [120+gamma]   ; 9.5
fld st0                 ;make copy
fld tword [ebx]         ;load x again
fxch                    ;exchange st0 and st1: st0 = x + 9.5, st1 = x + .5
FYL2X                   ;st0 = st0 ^ st1
FLD st0                 ; "
FRNDINT                 ; "
FSUB st1, st0           ; "
FLD1                    ; "
FSCALE                  ; "
FXCH                    ; "
FXCH st2                ; "
F2XM1                   ; "
FLD1                    ; "
FMULP st1, st0          ; "
fstp st1                ; clean up fpu stack, result in st0
fxch                    ;exchange st0 and st1: st0 = x + 9.5, st1 = (x + 9.5) ^ (x + .5)
fchs                    ;st0 = - st0 = -(x + 9.5)
fld tword [rx_e]        ;st0 = exp(st0)
FYL2X                   ; "
FLD st0                 ; "
FRNDINT                 ; "
FSUB st1, st0           ; "
FLD1                    ; "
FSCALE                  ; "
FXCH                    ; "
FXCH st2                ; "
F2XM1                   ; "
FLD1                    ; "
FMULP st1, st0          ; "
fstp st1                ; clean up fpu stack, result in st0
fmulp st1,st0           ;st0 = (x + 9.5) ^ (x + .5) * exp(-(x + 9.5))
fld tword [gamma]       ; 2.50662827463100050  ; Sqrt(2*Pi)
fmulp st1,st0           ;st0 = (x + 9.5) ^ (x + .5) * exp(-(x + 9.5)) * Sqrt(2*Pi)
fld tword [gamma+10]    ;1.00000000000000017
fld tword [ebx]         ;load x again
fiadd dword [ten]       ;st0 = x + 10
fld tword [110+gamma]   ;-4.02353314126823637e-9
fdiv st0,st1            ;st0 = -4.02353314126823637e-9 / (x + 10)
fld1
fsubp st1,st0           ;st0 = x + 9
fld tword [100+gamma]   ; 5.38413643250956406e-8
fdiv st0,st1
fld1
fsubp st1,st0           ;st0 = x + 8
fld tword [90+gamma]    ;-7.42345251020141615e-3
fdiv st0,st1
fld1
fsubp st1,st0           ;st0 = x + 7
fld tword [80+gamma]    ; 2.60569650561175583
fdiv st0,st1
fld1
fsubp st1,st0           ;st0 = x + 6
fld tword [70+gamma]    ;-108.176705351436963
fdiv st0,st1
fld1
fsubp st1,st0           ;st0 = x + 5
fld tword [60+gamma]    ; 1301.60828605832187
fdiv st0,st1
fld1
fsubp st1,st0           ;st0 = x + 4
fld tword [50+gamma]    ;-6348.16021764145881
fdiv st0,st1
fld1
fsubp st1,st0           ;st0 = x + 3
fld tword [40+gamma]    ; 14291.4927765747855
fdiv st0,st1
fld1
fsubp st1,st0           ;st0 = x + 2
fld tword [30+gamma]    ;-14815.3042676841391
fdiv st0,st1
fld1
fsubp st1,st0           ;st0 = x + 1
fld tword [20+gamma]    ; 5716.40018827434138
fdivrp st1,st0
fmulp st1,st0
fstp st1
;st0 = gamma(x + 1)

gamma:
;N=10,Y=9
dw \$2CB2,\$B138,\$98FF,\$A06C,\$4000    ;   2.50662827463100050  ; Sqrt(2*Pi)  ;    gamma
dw \$064A,\$0000,\$0000,\$8000,\$3FFF    ;   1.00000000000000017 ______________ ; 10+gamma
dw \$4FAA,\$E8F4,\$3395,\$B2A3,\$400B    ;   5716.40018827434138 ______________ ; 20+gamma
dw \$6D9E,\$F2A2,\$3791,\$E77D,\$C00C    ;  -14815.3042676841391 ______________ ; 30+gamma
dw \$C153,\$6C23,\$F89A,\$DF4D,\$400C    ;   14291.4927765747855 ______________ ; 40+gamma
dw \$767D,\$2FD2,\$4820,\$C661,\$C00B    ;  -6348.16021764145881 ______________ ; 50+gamma
dw \$5DC8,\$52E3,\$7714,\$A2B3,\$4009    ;   1301.60828605832187 ______________ ; 60+gamma
dw \$5F26,\$B2E6,\$791F,\$D85A,\$C005    ;  -108.176705351436963 ______________ ; 70+gamma
dw \$AC57,\$B9DA,\$BB46,\$A6C3,\$4000    ;   2.60569650561175583 ______________ ; 80+gamma
dw \$5E13,\$9ACD,\$6EE0,\$F340,\$BFF7    ;  -7.42345251020141615e-3 ___________ ; 90+gamma
dw \$16EB,\$FC65,\$34C4,\$E73F,\$3FE6    ;   5.38413643250956406e-8 ___________ ;100+gamma
dw \$B1AB,\$8882,\$5F2D,\$8A3F,\$BFE3    ;  -4.02353314126823637e-9 ___________ ;110+gamma
dw \$0000,\$0000,\$0000,\$9800,\$4002    ;   9.5 ______________________________ ;120+gamma
rx_half:
dw \$0000,\$0000,\$0000,\$8000,\$3FFE
rx_e:
ten: dd 10
``````

i must admit, i get lost with the fpu stack.
Posted on 2003-08-31 21:09:37 by jack
When you have a complex computation such as your gamma example, you simply have to do a lot of pre-planning to define the sequence in which you will perform the various operations. If it seems impossible to keep all intermediate results on the stack, then you have to keep some in memory.

Then, you should always keep track of which data is in which register whenever you load or pop data.

As for your factorial function, you only need 3 registers. That should thus be easy.

Assuming the FPU has already been initialized with the FINIT instruction, it will perform all computations in REAL10 mode (if you don't initialize it, Windows would have set it up for REAL8 mode). Also assuming that the FPU is "clean" and that ECX holds a valid "n" for n!,
``````
push ecx             ;will be used to load "n" in FPU
fld1                 ;st=1
or   ecx,ecx
jz   finish          ;by convention, 0!=1
fld  st              ;st=1, st1=1
fild dword ptr[esp]  ;st=n, st1=1, st2=1
;st2 will be used for the running n!
;st1 will be used to decrement "n"
@@:
dec  ecx             ;decrement counter
jz   clean_up
fmul st(2),st
fsub st,st(1)        ;decrement the "n"
jmp  @B

clean_up:
fcompp               ;pops both the "n" and the "1" constant
;leaving only n! on FPU

finish:
pop  ecx             ;clean up the CPU stack
;returns answer in FPU top register
``````

When I look at your code, you would discard the result and cause an invalid operation when ECX=0. You are also loading and discarding the "1" constant in each loop.

Raymond
Posted on 2003-08-31 23:41:07 by Raymond
Maybe,
``````	push	ecx		; will be used to load "n" in FPU
fld1			; st=1
fld1			; st=1, st1=1
fild	dword ptr [esp]	; st=n, st1=1, st2=1
; st2 will be used for the running n!
; st1 will be used to decrement "n"

jmp	_2

align 8

_1:	fmul	st(2), st
fsub	st, st(1)	; decrement the "n"
_2:	dec	ecx		; decrement counter
jnle	_1

fcompp			; pops both the "n" and the "1" constant
; leaving only n! on FPU
pop	ecx		; clean up the CPU stack
; returns answer in FPU top register``````
No need to multiply by one and all jumps predicated except one. :)

Can you imagine someone trying to copywrite such a logical solution?
Posted on 2003-09-01 00:10:45 by bitRAKE