Alex,

I had some time today to have a further play with the InString algo and one mod I tried was to start the comparison on the next byte but every version I tried fails under the condition of searching for a single byte that is at the end of the string.

src db "This is a test of a string algorithm",0
pat db "m",0

It is basically a good idea but this algo was designed particularly to handle anything that could be typed in and searching for a single byte is not uncommon with things like text search in zero terminated strings.



I tried my version with your test src and pat it correctly returned
24h
here is what I tried your test with:


.386
.model flat, stdcall ; 32 bit memory model
option casemap :none ; case sensitive

StrLen PROTO :DWORD

.code

InString proc startpos:DWORD,lpSource:DWORD,lpPattern:DWORD
; ------------------------------------------------------------------
; InString searches for a substring in a larger string and if it is
; found, it returns its position in eax. ;
; It uses a one (1) based character index (1st character is 1,
; 2nd is 2 etc...) for both the "StartPos" parameter and the returned
; character position. ; ; Return Values.
; If the function succeeds, it returns the 1 based index of the start
; of the substring. ; 0 = no match found
; -1 = substring same length or longer than main string
; -2 = "StartPos" parameter out of range (less than 1 or longer than
; main string)
; ------------------------------------------------------------------
LOCAL pLen:DWORD
push ebx
push esi
push edi

mov esi, lpSource
mov edi, lpPattern

invoke StrLen,esi
mov ebx, eax ; ebx = source length
invoke StrLen,edi
mov ecx,startpos ;ecx = startpos
add edi,eax ;edi=end of pattern
neg eax ;edi+eax = start of pattern
mov pLen, eax ; - (pattern length)
dec ecx ;ecx = startpos -1
js @errm2 ;startpos <= 0? yes - goto error -2
add ebx,eax ;ebx= sLen - pLen
js @errm1 ; if sLen < pLen goto error -1
lea esi,[esi][ebx][1] ; esi= address of part wich is < pLen
not ebx ;ebx = -(ebx+1)= - ((sLen - pLen)+1)
add ecx,ebx ; ecx = startpos -1 ebx = - ((sLen - pLen)+1) ;if ecx >= (sLen - pLen)+1 then
jns @errm2 ;ecx - ((sLen-pLen)+1) >= 0 and SF =0
mov al,[edi][eax] ;first byte
jmp Loop_Start
;@@@@@@@@@

Pre_Loop:
pop ecx ;restore ECX
inc ecx ;start on next byte
Loop_Start:
cmp al,[esi+ecx]
je Pre_Sub
inc ecx
jnz Loop_Start
xor eax,eax
jmp isOut
Pre_Sub:
push ecx ;preserve ECX
mov edx,pLen
Sub_Loop:
inc ecx
inc edx ;don't check fist byte
mov ah,[esi+ecx]
je found ;edi+0 = pattern end
cmp ah,[edi][edx]
je Sub_Loop
jmp Pre_Loop

; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
found:
pop eax
sub eax, ebx ;sub neg value = add positive
inc eax

isOut:
pop edi
pop esi
pop ebx
ret
@errm2:
mov eax,-2
jmp isOut
@errm1:
mov eax,-1
jmp isOut
InString endp
end
Posted on 2002-04-14 01:20:36 by The Svin
OK,

I have digested eko's optimisation suggestion witjh the loop code and I have it testing reliably. I did not use the short circuit after the match loop as it involved an additional jump where the INC ECX fall through was more efficient.

I think this is near optimum loop code and it now needs Alex's conditional testing code to get the algorithm size down and a shorter path for the conditional code.

What I am tempted to do is write an auxilary scan loop that handles single characters and modify the following one so that it starts on the next byte when more than one character is being searched for.

Here is the current version below.



; #########################################################################

InStringx proc startpos:DWORD,lpSource:DWORD,lpPattern:DWORD

; ------------------------------------------------------------------
; InString searches for a substring in a larger string and if it is
; found, it returns its position in eax.
;
; It uses a one (1) based character index (1st character is 1,
; 2nd is 2 etc...) for both the "StartPos" parameter and the returned
; character position.
;
; Return Values.
; If the function succeeds, it returns the 1 based index of the start
; of the substring.
; 0 = no match found
; -1 = substring same length or longer than main string
; -2 = "StartPos" parameter out of range (less than 1 or longer than
; main string)
; ------------------------------------------------------------------

LOCAL sLen:DWORD
LOCAL pLen:DWORD

push ebx
push esi
push edi

invoke StrLen,lpSource
mov sLen, eax ; source length
invoke StrLen,lpPattern
mov pLen, eax ; pattern length

cmp startpos, 1
jge @F
mov eax, -2
jmp isOut ; exit if startpos not 1 or greater
@@:

dec startpos ; correct from 1 to 0 based index

cmp eax, sLen
jl @F
mov eax, -1
jmp isOut ; exit if pattern longer than source
@@:

sub sLen, eax ; don't read past string end
inc sLen

mov ecx, sLen
cmp ecx, startpos
jg @F
mov eax, -2
jmp isOut ; exit if startpos is past end
@@:
; ----------------
; setup loop code
; ----------------
mov esi, lpSource
mov edi, lpPattern
mov al, [edi] ; get 1st char in pattern
add edi, pLen ; set EDI to value for match loop

add esi, ecx
neg ecx ; invert sign
add ecx, startpos ; add starting offset

jmp Scan_Loop

; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

Pre_Scan:
inc ecx ; start on next byte

Scan_Loop:
cmp al, [esi+ecx]
je Pre_Match
inc ecx
jnz Scan_Loop

jmp No_Match

Pre_Match:
lea ebx, [esi+ecx]
mov edx, pLen ; put pattern length into EDX
add ebx, edx ; add pattern length to scan location
neg edx ; invert sign for index

Test_Match:
mov ah, [ebx+edx]
cmp ah, [edi+edx]
jne Pre_Scan ; jump back on mismatch
inc edx
jnz Test_Match

; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

add ecx, sLen
mov eax, ecx ; match
inc eax
jmp isOut

No_Match:
xor eax, eax

isOut:

pop edi
pop esi
pop ebx

ret

InStringx endp

; ########################################################################


Regards,

hutch@movsd.com
Posted on 2002-04-14 02:52:21 by hutch--
ok .. here is a working version ( i had some stupid mistakes in the first)



InString1 proc startpos:DWORD,lpSource:DWORD,lpPattern:DWORD

push esi
push edi

cmp startpos, 1

jge @F
mov eax, -2
jmp isOut ; exit if startpos not 1 or greater
@@:

dec startpos ; correct from 1 to 0 based index

invoke StrLen,lpPattern
push eax ; pattern length ;because of strlen

invoke StrLen,lpSource
; mov sLen, eax ; source length

pop edx



cmp edx, eax
jl @F
mov eax, -1
jmp isOut ; exit if pattern longer than source
@@:



sub eax,edx
inc eax

cmp eax, startpos
jg @F
mov eax, -2
jmp isOut ; exit if startpos is past end
@@:

mov sLen, eax
; ----------------
; setup loop code
; ----------------
mov esi, lpSource
mov edi, lpPattern
mov cl, [edi] ; get 1st char in pattern


; add edi, edx ; set EDI to value for match loop

add esi, eax
neg eax ; invert sign
add eax, startpos ; add starting offset

;jmp Scan_Loop
;instead of the jump do

push edx

; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@


Scan_Loop:
cmp cl, [esi+eax]
je Pre_Match
Pre_Scan:
inc eax
jnz Scan_Loop
; the loop set eax to set if not found

jmp @loopout


Pre_Match:

mov edx,[esp] ; get edx back instead pop and push
lea ebx,[esi+eax]
Test_Match:
mov ch, [edi+edx-1]
cmp ch, [ebx+edx-1]
jne Pre_Scan ; jump back on mismatch
dec edx
jnz Test_Match

add eax,sLen
inc eax


@loopout:
add esp,4 ; instead of this . make local var and save edx there
isOut:
pop edi
pop esi
pop ebx



ret
InString endp



bye

eko


EDIT:


another idea
because in the test_match the algo scan from the end to the begining . in every non match i can add to eax the len of the pattern



sub eax,edx ; you can do jmp Scan_Loop , but i personly, dont like to jump
push edx ;put push edx here , you wont get stall from the add

add_len: ;in the test_match change the jmp from pre_scan to add_len

add eax,

Scan_Loop:
cmp cl,
je Pre_Match
inc eax
jnz Scan_Loop
Posted on 2002-04-16 06:07:52 by eko
ypu must also take care of the strlen function called I have coded a very fast one:

;###########################################################################
; This procedure find the string length without the null termenator
; On Entry: address of the string you want to find its length(must be null terminated)
; On Return: returns the length of the string in eax
;###########################################################################
strlen PROC uses ecx srcStr:DWORD

xor eax,eax
mov ecx, srcStr
loopit:
or BYTE PTR ,0
jz endit
inc eax
inc ecx
jmp loopit
endit: ret
strlen ENDP

try it
Posted on 2002-04-16 08:24:41 by amr
I don't want to start another strlen topic (very popular one :grin: ) but yours is a byte scanner which can be slow on large strings...
Advanced algos has been explained on some topics of this board section, but I propose my own "strlen byte scan" version which is for most applications fast enough since many others have an overhead before they go at their optimal speed (on "little" strings, a byte scan can be faster).

It has been quickly coded (for size), there's few seconds :-)



RDS_strlen proc
mov eax, offset [Hello -1]
@@:
inc eax
cmp byte ptr [eax], 0
jne @B
sub eax, offset Hello
ret
RDS_strlen endp


Its main advantage is to only destroy eax, and it doesn't matter because it is the value returning register.

Yours is 136 cycles long on my Athlon Thunderbird 1200 MHz on a 39 characters long string.
Mine is 89 cycles on the same machine and string.
Posted on 2002-04-16 12:15:53 by JCP
I have played with the later version that Eko posted and again the loop code design is very good. It is a different algo to the earlier versions posted in that it does the match loop in reverse but my own testing over time shows that a forward or reverse match loop does not effect the times on normal text data to any significant degree.

The snippet I have posted below is the basic loop code design that Eko has optimised with a few suggestions. The DEC EDX is done before the loop code to reduce the complexity of the effective address calculation in the match loop. This allows the following,
mov ch,
against
mov ch,

Effective address calculations take time and the less complex they are, the faster they are under some circumstances.

The other mod is to place,
Pre_Scan1:
inc eax
before the scan loop so that a simple fall through occurs rather than execute the extra jump.

As before, Compliments to Eko on some good optimisation technique that should show in the recovery speed of a mismatch when scanning longer strings that have a large number of partial matches.

Regards,

hutch@movsd.com



; --- snip ----

dec edx
push edx
jmp Scan_Loop1

; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

Pre_Scan1:
inc eax

Scan_Loop1:
cmp cl, [esi+eax]
je Pre_Match1
inc eax
jnz Scan_Loop1

jmp @loopout

Pre_Match1:
mov edx,[esp] ; get edx back instead pop and push
lea ebx,[esi+eax]

Test_Match1:
mov ch, [edi+edx]
cmp ch, [ebx+edx]
jne Pre_Scan1 ; jump back on mismatch
dec edx
jnz Test_Match1

; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
; --- snip ----


PS : I just learnt something useful about this algorithm and ones with similar sized loop code, they are sensitive to alignment. I tested both the version here with Eko's optimisation and a similar one using his optimisations and if you use ALIGN 8 or 16 before the label "Pre_Scan1", the speed increases about 15%.
Posted on 2002-04-16 21:34:36 by hutch--
"aligned code does not matter on anything recent"
Oh well. You live, you learn ;)
Posted on 2002-04-17 06:41:16 by f0dder
Not only it matters a lot.. but I suggest to who wrote "aligned code does not matter on anything recent" to test the speed of a very "self-optimizing" CPU like Athlon on a innerloop when it's aligned on a even address or on a odd one.
Posted on 2002-04-17 06:52:40 by Maverick
I have learnt a long time ago to trust the clock more than dogma, I have written code that I have deliberately misaligned and the timings were no different, other code I have found that the timings were variable depending on the code added in the proc before it.

With the two algos, Eko's original and my own version using part of his optimisation and some more of my own that both byte intensive loops could be efected by code placed before them. ALIGN 4 did not help, ALIGN 8 partially helped and ALIGN 16 fixed it with both at the same time.

Does the clock tell lies ?

Ho Hum.

hutch@movsd.com

PS : Optimisation testing on a PIII, could not be bothered turning on the PIV.
Posted on 2002-04-17 08:31:18 by hutch--
hii
hutch:i'm honoured to be complimented by you :)

in my last design. i'm scaning backwards . and thought if i scan backwards . and i dont find a match . i can add to the counter the len of the pattern instead increment it by one .

i want to add eax,,
so think that i shouldnt dec edx before the push

i have another version, but i'm not at my home right now , i'll post it as soon as i get home .


bye

eko
Posted on 2002-04-17 11:52:41 by eko
Steve and eko,
I'm humbly telling you for the 3d time -
you checking for the first byte in pattern though it is already checked and passed OK.
I showed in my code how to avoid it, you may create your own way to do it, but, please just look more carefully - you do one needless iteration comparing pattern with source.
Posted on 2002-04-17 18:37:37 by The Svin
Alex,

You are probably right but its the loop code that I want to get right at the moment and I am very pleased with the optimisation suggestions that Eko has made as it has benchmarked faster.

When the loop code is finalised, chasing code size efficiency in the prologue and epilogue code will then make sense. i will certainly make use of your code design then as it reduces the size of the code.

The only problem with the suggestion is that with the exit from the scan loop being dependent on the zero flag, a search for a single byte causes it to go past zero. The suggestion you have made is a good one as it allows a much faster early out with a mismatch on the next character and this improves partial match recovery but the algo is not fully optimised yet.

Regards and thanks for your contribution,

hutch@movsd.com
Posted on 2002-04-17 23:38:03 by hutch--
hiii ., i have two versions .

 

InString1 proc startpos:DWORD,lpSource:DWORD,lpPattern:DWORD
push ebx
push esi
push edi


invoke StrLen,lpPattern ; pattern length
push eax ;because of strlen
invoke StrLen,lpSource ; mov sLen, eax ; source length

mov ebx,startpos
pop edx

cmp ebx,1
jb @ER
cmp eax,ebx
jg @F
@ER:
mov eax, -2
jmp isOut ; exit if startpos is past end OR exit if startpos not 1 or greater
@@:
sub eax,edx
jg @F
mov eax, -1
jmp isOut ; exit if pattern longer than source
@@:



mov sLen, eax
; sub eax,ebx ; add the start offset
; ----------------
; setup loop code
; ----------------
mov edi, lpPattern
mov esi, lpSource
;

add esi,eax


neg eax ; invert sign
push edx
add eax,ebx

mov cl,[edi] ; get 1st char in pattern
sub eax,edx

the begining of the code is the same for the two version ^

version one :(only the loops)




add_len:
mov ebx,[esp]
add eax,ebx
Scan_Loop:
cmp cl, [esi+eax]
je Pre_Match
;Pre_Scan: no need for this
inc eax
jnz Scan_Loop ; the loop set eax to set if not found


jmp @loopout


Pre_Match:
lea edx,[ebx-1] ; get edx back instead pop and push
lea ebx,[esi+eax]
Test_Match:
mov ch, [edi+edx]
cmp ch, [ebx+edx]
jne add_len ; jump back on mismatch
dec edx
jnz Test_Match

add eax,sLen
inc eax



version two (only the loops)
 

add_len:
mov ebx,[esp]
add eax,ebx ; still have problem in here . (stall - big one i think)
Scan_Loop:
cmp cl, [esi+eax]
je Pre_Match
;Pre_Scan: no need for this
inc eax
jnz Scan_Loop ; the loop set eax to set if not found


jmp @loopout


Pre_Match:
lea edx,[ebx-1] ; get edx back instead pop and push
mov ch, [edi+ebx-1] ; dont put edx. no stall
lea ebx,[esi+eax]
Test_Match:
cmp ch, [ebx+edx]
jne add_len ; jump back on mismatch
dec edx
mov ch, [edi+edx]
jnz Test_Match

add eax,sLen
inc eax


bye

eko
Posted on 2002-04-18 11:51:27 by eko