;PIII ROUTINES : STRLEN (easily adaptable for finding a byte/aligned word in a string)
;MASM 6.15.8803 & LINK 5.12 & MASM32 7

;weaker on short strings (<16) :( and 28% faster than Jens's on long ones :)
;Author Valery CLAUDEPIERRE, 2002

;Thanks, guys, for Masm32,Radasm,Profile and Debug libs... great tools :alright:
;would be a little handier with a sampler as Symantec's C/C++ 11.0c good one (but it has awful asm inlining).


;#############################################################################

.686
.mmx
.xmm
.model flat,stdcall
option casemap :none ; case sensitive


include c:\masm32\include\profile.inc
include c:\masm32\include\windows.inc
include c:\masm32\include\kernel32.inc
include c:\masm32\include\user32.inc
include c:\masm32\include\masm32.inc
include c:\masm32\include\debug.inc

includelib c:\masm32\lib\masm32.lib
includelib c:\masm32\lib\debug.lib
includelib c:\masm32\lib\kernel32.lib
includelib c:\masm32\lib\user32.lib

;#############################################################################

align 16
.data
lpString dq 100 dup(0102030405060708h),010203040500fe74h,1 ;awaited result 802
.code

;#############################################################################

align 8
Jens:
mov ecx,offset lpString
@@:
mov eax,dword ptr[ecx]
add ecx,4

lea edx,[eax - 01010101h]
xor eax,edx
and eax,80808080h
and eax,edx
jz @B

bsf edx,eax
sub edx,4
shr edx,3
lea eax,[ecx+edx-4]
sub eax,offset lpString

ret


align 8
Valy:
mov ecx,offset lpString
pxor MM1,MM1

@@:
movq MM0,qword ptr[ecx]
pcmpeqb MM0,MM1
pmovmskb eax,MM0
add ecx,8
or eax,eax
jz @B

bsf eax,eax
; lea eax,[eax+ecx-8]
; sub eax,offset lpString
sub eax,offset lpString+8
add eax,ecx

ret


start:

PROFILE Jens
PrintDword PROFILECYCLES
;PrintDword PROFILECYCLES+4
PrintDword eax
PROFILE Valy
PrintDword PROFILECYCLES
;PrintDword PROFILECYCLES+4
PrintDword eax

ret

end start


I just added the formatting so the algo was easier to read.
Posted on 2002-09-05 07:25:57 by valy
hi!

For sure MMX, SSE, 3dnow & Co. can do it much faster than "normal" x86 instructions.
But not everyone has the latest processors. ( Like me for example, i still use a PII :tongue: )

btw. Why don't you compare it to the MMX code ? I think that would be more reasonable. :)

Cu, Jens
----
http://www.emucheater.com
http://cyberpad.psxemu.com
Posted on 2002-09-07 14:12:27 by Jens Duttke
In fact I had not read the whole thread :grin:

And I just tested Nexo's version for long strings : it is 10% better, at least.

So forget my routine :grin:

Bye
Posted on 2002-09-12 06:52:27 by valy