CmpStr proc uses esi edi lpAlm:dword,lpAlm2:dword,Len:dword

;xor eax,eax << Size optimized, But Real Slow.
;
;mov esi,lpAlm
;mov edi,lpAlm2
;mov ecx,Len
;
;repe cmpsb
;jnz lbng
;inc eax
;lbng:

cld
xor eax,eax ; << Speed & size raise Up

mov esi,lpAlm
mov edi,lpAlm2
mov ecx,Len
shr ecx,2
jz it_only_Byte
repe cmpsd
jnz lbng
mov ecx,Len
and ecx,3
jnz trs
inc eax
ret
trs:
repe cmpsb ; 8
jnz lbng
inc eax
ret
it_only_Byte:
mov ecx,Len
jmp trs
lbng:

ret
CmpStr endp



How Low can you go?
Posted on 2003-10-08 08:01:50 by realvampire
What about


CmpStr proc uses ebx lpAlm:dword,lpAlm2:dword,length:dword
mov ecx, lpAlm
mov edx, lpAlm2
mov ebx, length
shr ebx, 2
@@:
mov eax, [ecx+ebx]
cmp [edx+ebx], eax
jnz _notmatch
sub ebx, 4
jnz @B
mov ebx, length
@@:
mov al, [ecx+ebx]
cmp [edx+ebx], al
jnz _notmatch
test ebx, 3
jnz @B
clc
ret
_notmatch:
stc
ret
CmpStr endp
Posted on 2003-10-08 09:29:01 by roticv
Whoops..., GPF.
Posted on 2003-10-14 05:01:46 by realvampire
My mistake.

Try


CmpStr proc uses ebx lpAlm:dword,lpAlm2:dword,length:dword
mov ecx, lpAlm
mov edx, lpAlm2
mov ebx, length
@@:
test ebx, 3
jz _dwordaligned
mov al, [ecx+ebx]
cmp [edx+ebx], al
jnz _notmatch
sub ebx, 1
jnz @B
_dwordaligned:
shr ebx, 2
@@:
mov eax, [ecx+ebx*4]
cmp [edx+ebx*4], eax
jnz _notmatch
sub ebx, 4
jnz @B
clc
ret
_notmatch:
stc
ret
CmpStr endp



Just make sure your string is at least 4byte in length.
Posted on 2003-10-14 06:47:18 by roticv
strCMP PROC iLength:DWORD, pString1:DWORD, pString2:DWORD

; (c) bitRAKE
;[imgmail]http://www.asmcommunity.net/board/cryptmail.php?tauntspiders=in.your.face@nomail.for.you&id=2102c2c79da62f8c2f42a8098ea13d63[/imgmail]
; 14 bytes!
pop edx ; return address
pop ecx ; count
pop eax ; string one
xchg esi, [esp] ; string two
xchg eax, edi
push eax ; save EDI
repe cmpsb
pop edi
pop esi
jmp edx
strCMP ENDP
Posted on 2003-10-14 07:13:00 by bitRAKE
This algo is not the shortest but the loop code should be competitively fast. It takes two zero terminated strings as parameters. It is written for the newest version of the MASM32 library to be released soon.


szCmp proc str1:DWORD, str2:DWORD

; --------------------------------------
; scan zero terminated string for match
; --------------------------------------
push esi
mov ecx, str1
mov edx, str2
xor esi, esi
cmst:
mov al, [ecx+esi]
cmp al, [edx+esi]
jne no_match
add esi, 1
test al, al ; check for terminator
jne cmst

lea eax, [ecx+esi-1]
sub eax, str1 ; return length on match
jmp cmpout

no_match:
xor eax, eax ; return zero on no match

cmpout:
pop esi
ret

szCmp endp
Posted on 2003-10-30 02:33:35 by hutch--

strCMP PROC iLength:DWORD, pString1:DWORD, pString2:DWORD

; (c) bitRAKE
;[imgmail]http://www.asmcommunity.net/board/cryptmail.php?tauntspiders=in.your.face@nomail.for.you&id=2102c2c79da62f8c2f42a8098ea13d63[/imgmail]
; 14 bytes!
pop edx ; return address
pop ecx ; count
pop eax ; string one
xchg esi, [esp] ; string two
xchg eax, edi
push eax ; save EDI
repe cmpsb
pop edi
pop esi
jmp edx
strCMP ENDP


My P4 optimization manual says that the way you popped the return address of the stack and jumped to the instruction after the call is a bad idea, cause the P4 matches the calls with returns to speed up the process, but who cares about the P4 =P ?
Just to let you know.
Posted on 2003-11-01 18:09:24 by x86asm
x86asm, important to note the limitations, but it was coded for size. Additionally, I really don't use this type of function because it is built in to the processor to compare strings:
	mov	esi, pString1

mov edi, pString2
mov ecx, iString1
repe cmpsb
It takes just as many bytes as pushing the parameters and call the routine. The only reason to do otherwise would be for the purpose of speed. So, my implementation above has no worth, what so ever. :tongue:
Posted on 2003-11-01 19:02:35 by bitRAKE




szCmp proc str1:DWORD, str2:DWORD

; --------------------------------------
; scan zero terminated string for match
; --------------------------------------
push esi
mov ecx, str1
mov edx, str2
xor esi, esi
cmst:
mov al, [ecx+esi]
cmp al, [edx+esi]
jne no_match
[color=red] add esi, 1[/color]
test al, al ; check for terminator
jne cmst

lea eax, [ecx+esi-1]
sub eax, str1 ; return length on match
jmp cmpout

no_match:
xor eax, eax ; return zero on no match

cmpout:
pop esi
ret

szCmp endp


Isn't "inc esi" the faster instruction than "add esi,1" ?

Speaking of which, I see a lot of string procs when I do search, but can someone point me to the fastest (not smallest) string procedures?
Posted on 2003-12-01 16:46:53 by Mikky
I dont understand it too. inc esi is 1 cl(and 1 byte).
Posted on 2003-12-02 05:47:27 by realvampire
You all have yet to read the Intel P4 manual for optimisation?

"The inc and dec instructions modify only a subset of the bits in the flag register. This creates a dependence on all previous writes of the flag register. This is especially problematic when these instructions are on the critical path because they are used to change an address for a load on which many other instructions depend."
Posted on 2003-12-02 06:41:02 by roticv