Just some rdtsc stuff. Based on the lowest cycle count is correct view. Seems to be accurate even with a _LOOPCOUNT of just 3. May not give perfect cycle counts for stuff like solo xor eax, eax but the nice thing is it gives the same cycle count every time. No variation (for small blocks). So it might be useful for reordering code to see which pairs best, etc. e.g. it's impressive how much longer 'cdq; div ecx' takes compared to 'xor edx, edx; div ecx'. Suggestions/improvements welcome.

---

.686
.model flat,stdcall
option casemap:none

include masm32includewindows.inc
include masm32includekernel32.inc
includelib masm32libkernel32.lib

include masm32includemasm32.inc ;;;
includelib masm32libmasm32.lib ;;;
include masm32includedebug.inc ;;;
includelib masm32libdebug.lib ;;;

.data
_LOOPCOUNT equ 500 ; adjust for number of times to loop through timed code
_MHZ dd 1400000000 ; adjust to cpu speed (1.4Ghz here)
_ftemp dd 0
_f32max dq 100000000h
_numseconds dq 0
_seconds db 30 dup (0)
_counter dd 0
_toverhead dd 0
_tcycles dq 0ffffffffffffffffh
.code

RDTSC_INIT MACRO
mov _toverhead, 0
cpuid
cpuid
cpuid
C1
_C2
C1
_C2
C1
_C2
mov _toverhead, eax ; overhead for loops
ENDM

RDTSC_START MACRO
rdtsc
push edx ; starting count
push eax ;
ENDM

RDTSC_STOP MACRO
rdtsc
pop ecx ;
sub eax,ecx ; subtract starting count
pop ecx ;
sbb edx,ecx ;
ENDM

; call before code
C1 MACRO
mov dword ptr [_tcycles], 0ffffffffh
mov dword ptr [_tcycles + 4], 0ffffffffh
mov eax, _LOOPCOUNT
mov _counter, eax
@@:
dec _counter
jz @F
RDTSC_START
ENDM

_C2 MACRO
RDTSC_STOP
cmp eax, dword ptr [_tcycles]
push edx
sbb edx, dword ptr [_tcycles + 4]
pop edx
jae @B
mov dword ptr [_tcycles], eax
mov dword ptr [_tcycles + 4], edx
jmp @B
@@:
sub eax, _toverhead
ENDM

; call after code
C2 MACRO
_C2
PrintDword edx
PrintDword eax
ENDM

T1 MACRO
C1
ENDM

T2 MACRO
C2
mov _ftemp, edx
fild _ftemp
fild _f32max
fmul
mov _ftemp, eax
fild _ftemp
fadd
fild _MHZ
fdiv
fstp _numseconds
invoke FloatToStr2, _numseconds, addr _seconds
PrintString _seconds
ENDM

start:

RDTSC_INIT

T1
mov eax, 1000
cdq
mov ecx, 10
div ecx
T2

ret

end start
Posted on 2004-08-05 02:41:49 by grv575