ufcomp macro var1,var2
mov eax,var1
mov ebx,var2
mov ecx,eax
and ecx,ebx
sar ecx,31
xor eax,ecx
xor ebx,ecx
cmp eax,ebx
endm

after the macro, you use jxx as if you compared two signed integers. Only exception is that -0.0 < 0.0


speed test:



invoke GetTickCount
push eax
mov ecx,1000000000
push ecx
@@:
;-----------------\
; fcmp ke1,ke2 ; 4276 milliseconds // macro, from the asm DX sdk
;ufcomp ke1,ke2 ; 3765 milliseconds // my macro
;-----------------/
dec dword ptr[esp]
jnz @B
pop ecx

invoke GetTickCount
pop edx
sub eax,edx
print eax

with both lines commented (empty code in loop), it takes 3355 milliseconds, so actually to execute 1 bil times, fcmp takes 921ms, while ufcomp takes 410ms
124% speedup , taking 0.7 cycle ...
This is on an AthlonXP2000+ (DDR 400MHz)

How it works:
Negative floats have the 31 least significant bits the same as their relative value. The most significant bit is set if the number is negative.
Case 1: If both var1 and var2 are positive, then a normal cmp can be used - for fastest results. But if the two floats can have any value - we first see if they're both negative.
Case 2: only one of them is negative. We can still use a normal cmp
Case 3: both are negative - then cmp should have inverse logic. But we won't do any jumps around, so we do this: on line 5, ecx is either 0 or FFFFFFFFh. It's the latter only if both numbers were negative. Then we xor with ecx to negate them, and finally compare them.
Thus all 3 cases get covered by non-branching code, and this on my AthlonXP runs in 0.7 cycle ^_^ . Improvements are welcome
Posted on 2004-11-29 17:40:20 by Ultrano
nice - but shouldn't float comparisons be "is difference between numbers reasonably small" rather than equality, because of the imprecisions of floats? (me stupid)
Posted on 2004-11-29 19:09:05 by f0dder
With the floating point compare instruction you'll get the same result anyway, it's just that this is faster. And if the coder wants little precision, he's better off converting both floats to long.
with jxx I mean: jl, jg, jle, jge, jne, jz

test1 real4 3.3
rest2 real4 -17.5

ufcomp test1,test2
jne @F
print "are equal"
@@:
jl @F
print "test1 is greater"
@@;
jg @F
print "test1 is lower"
@@:
Posted on 2004-11-29 22:42:43 by Ultrano
It seems to work!
Can you explain what's the effect on a float when its binary representation is reverted... (last 2 xor sentences)

Regards :)

Biterider
Posted on 2004-11-30 00:49:12 by Biterider
If the most significant bit of both floats is 1 (they're negative), when we negate the other bits we get a reverse result in the comparison. Uh can't explain things really well, but it's really similar to negating two integers and then comparing them. Only bits 31 and 30 are kind of special, but they get removed when comparing both negatives.
Posted on 2004-11-30 03:58:33 by Ultrano
I explore the possibility for a more flexible register usage.

ufcomp macro var1, var2

local reg1, reg2, reg3

if (opattr var1) and 00010000b ;;Is var1 a register?
reg1 textequ <var1>
if (opattr var2) and 00010000b ;;Is var2 a register?
reg2 textequ <var2>
else
ifdifi reg1, <eax>
reg2 textequ <eax>
elseifdifi reg1, <ecx>
reg2 textequ <ecx>
elseifdifi reg1, <edx>
reg2 textequ <edx>
endif
mov reg2, var2
endif
else
if (opattr var2) and 00010000b ;;Is var2 a register?
reg2 textequ <var2>
ifdifi reg2, <eax>
reg1 textequ <eax>
elseifdifi reg2, <ecx>
reg1 textequ <ecx>
elseifdifi reg2, <edx>
reg1 textequ <edx>
endif
mov reg1, var1
else
reg1 textequ <eax>
reg2 textequ <ecx>
mov reg1, var1
mov reg2, var2
endif
endif

ifdifi reg1, <eax>
ifdifi reg2, <eax>
reg3 textequ <eax>
endif
endif

ifndef <reg3>
ifdifi reg1, <ecx>
ifdifi reg2, <ecx>
reg3 textequ <ecx>
endif
endif
endif

ifndef <reg3>
ifdifi reg1, <edx>
ifdifi reg2, <edx>
reg3 textequ <edx>
endif
endif
endif

mov reg3, reg1
and reg3, reg2
sar reg3, 31
xor reg1, reg3
xor reg2, reg3
cmp reg1, reg2
endm


Regards,

Biterider
Posted on 2004-11-30 04:19:49 by Biterider
Yup, I was also thinking about this ^_^
Posted on 2004-11-30 07:01:54 by Ultrano