It's again about 64x bit integer to dec ASCIIZ convertion.
In these two versions you can see MMX commands and bsr logic
calculation.
They run faster (last is fastest though biggest) but mostly disign for new processors like PIII, 'cause bsr is designed in different way there and run much faster than in old arch. procs(~3 clocks).

For those who a really want to become programmers I recommend to look at the second proc - there is some new logic you can you in different tasks.
I don't want to spoil surprise :)
So you should figure out the logic yourself - I promise this homework worth it.

64mmx:


.586
.MMX
.model flat,stdcall
option casemap:none
include C:\masm32\include\windows.inc
include C:\masm32\include\user32.inc
include C:\masm32\include\kernel32.inc
includelib kernel32.lib
includelib user32.lib
.data
mmxb0F dq 0F0F0F0F0F0F0F0Fh
mmxb30 dq 3030303030303030h
num dq 12345678901234567890
.data?
buffer db 24 dup (?)
.code
start:
mov edx,dword ptr num+4
mov eax,dword ptr num
mov edi, offset buffer
call QtoA
invoke MessageBox,0,offset buffer,0,0
call ExitProcess
N19H EQU 00DE0B6B3H
N19L EQU 0A7640000H
N20H EQU 08AC72304H
N20L EQU 089E80000H

QtoA:
sub esp,12
xor ecx,ecx
cmp edx,N19H
jb @@nm18
jne @@cp20
cmp eax,N19L
jb @@nm18
@@cp20:
cmp edx,N20H
jb @@do19
jne @@do20
cmp eax,N20L
jb @@do19
@@do20:
mov cl,10h-1
sub eax,N20L
sbb edx,N20H
@@lp19:
inc ecx
@@do19:
sub eax,N19L
sbb edx,N19H
jae @@lp19
add eax,N19L
adc edx,N19H
@@nm18:
push edx
push eax
fild qword ptr [esp]
fbstp [esp]
mov [esp+9],ecx

mov edx,2
cmp dword ptr [esp+8],0
jne @@dg16
cmp dword ptr [esp+4],1
sbb edx,1
@@dg16:
bsr ecx,[esp+edx*4]
je @@zero
shr ecx,3
lea eax,[ecx+4*edx]
lea edx,[eax+eax]
cmp byte ptr [esp][eax],10h
sbb edx,-1

movq mm(7),mmxb0F
movq mm(6),mmxb30
movq mm(0),[esp]
movq mm(4),[esp+8]
movq mm(1),mm(0)
psrlq mm(1),4
pand mm(0),mm(7)
por mm(0),mm(6)
pand mm(1),mm(7)
por mm(1),mm(6)
movq mm(2),mm(0)
punpcklbw mm(0),mm(1)
movq [esp],mm(0)
punpckhbw mm(2),mm(1)
movq [esp+8],mm(2)
movq mm(5),mm(4)
psrlq mm(5),4
pand mm(4),mm(7)
por mm(4),mm(6)
pand mm(5),mm(7)
por mm(5),mm(6)
punpcklbw mm(4),mm(5)
movd [esp+16],mm(4)
emms
@@lpS:
mov eax,[esp+edx-3]
bswap eax
mov [edi],eax
add edi,4
sub edx,4
jns @@lpS
mov byte ptr [edi][edx][1],0
add esp,20
ret
@@zero:
mov dword ptr [edi],'0'
add esp,20
ret

end start
;64mmx2

.586
.MMX
.model flat,stdcall
option casemap:none
include C:\masm32\include\windows.inc
include C:\masm32\include\user32.inc
include C:\masm32\include\kernel32.inc
includelib kernel32.lib
includelib user32.lib
.data
mmxb0F dq 0F0F0F0F0F0F0F0Fh
mmxb30 dq 3030303030303030h
num dq 12345678901234567890
.data?
buffer db 24 dup (?)
.code
start:
mov edx,dword ptr num+4
mov eax,dword ptr num
mov edi, offset buffer
call QtoA
invoke MessageBox,0,offset buffer,0,0
call ExitProcess
;N19H EQU 00DE0B6B3H
;N19L EQU 0A7640000H
;N20H EQU 08AC72304H
;N20L EQU 089E80000H

D05H equ 045639182h
D05L equ 044F40000h
D04H equ 03782DACEh
D04L equ 09D900000h
D01H equ 00DE0B6B3h
D01L equ 0A7640000h
QtoA:
sub esp,12
xor ecx,ecx
sub eax,D01L
sbb edx,D01H
jb @@a01f
sub eax,D04L
sbb edx,D04H
jb @@a05
mov cl,05h
sub eax,D05L
sbb edx,D05H
jb @@a05
mov cl,15h
sub eax,D05L
sbb edx,D05H
jae @@l01
mov cl,10h
@@a05:
add eax,D05L
adc edx,D05H
@@l01: inc ecx
sub eax,D01L
sbb edx,D01H
jae @@l01
dec ecx
@@a01f:
add eax,D01L
adc edx,D01H

push edx
push eax
fild qword ptr [esp]
fbstp [esp]
mov [esp+9],ecx

mov edx,2
cmp dword ptr [esp+8],0
jne @@dg16
cmp dword ptr [esp+4],1
sbb edx,1
@@dg16:
bsr ecx,[esp+edx*4]
je @@zero
shr ecx,3
lea eax,[ecx+4*edx]
lea edx,[eax+eax]
cmp byte ptr [esp][eax],10h
sbb edx,-1

movq mm(7),mmxb0F
movq mm(6),mmxb30
movq mm(0),[esp]
movq mm(4),[esp+8]
movq mm(1),mm(0)
psrlq mm(1),4
pand mm(0),mm(7)
por mm(0),mm(6)
pand mm(1),mm(7)
por mm(1),mm(6)
movq mm(2),mm(0)
punpcklbw mm(0),mm(1)
movq [esp],mm(0)
punpckhbw mm(2),mm(1)
movq [esp+8],mm(2)
movq mm(5),mm(4)
psrlq mm(5),4
pand mm(4),mm(7)
por mm(4),mm(6)
pand mm(5),mm(7)
por mm(5),mm(6)
punpcklbw mm(4),mm(5)
movd [esp+16],mm(4)
emms

@@lpS:
mov eax,[esp+edx-3]
bswap eax
mov [edi],eax
add edi,4
sub edx,4
jns @@lpS
mov byte ptr [edi][edx][1],0
add esp,20
ret
@@zero:
mov dword ptr [edi],'0'
add esp,20
ret

end start
Posted on 2001-09-22 11:41:18 by The Svin