Ok, is this the real KMP? I just want to know before I do some optimizin' and put it into a real application.




[size=9]
KMP PROC USES ebx esi edi StringSource:DWORD, SourceLength:DWORD, StringPattern:DWORD, PatternLength:DWORD, ShiftTable:DWORD

;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
;Generate a shift table
;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

;Initialization before generating shift table

xor ecx, ecx
mov ebx, PatternLength
mov eax, -1
mov esi, StringPattern
mov edi, ShiftTable
mov DWORD PTR [edi], eax

;Start generating

@@GENERATE_TABLE:

cmp ecx, ebx
je @@TABLE_FINISHED

@@INNER_TABLE_LOOP:

test eax, eax
js @@CONTINUE_TABLE
mov dl, BYTE PTR [esi+ecx]
cmp dl, BYTE PTR [esi+eax]
je @@CONTINUE_TABLE
mov eax, DWORD PTR [edi+eax*4]
jmp @@INNER_TABLE_LOOP

@@CONTINUE_TABLE:

inc ecx
inc eax
mov dl, BYTE PTR [esi+ecx]
cmp dl, BYTE PTR [esi+eax]
jne @@SHIFTTABLE_VALUE_IS_EAX
mov edx, DWORD PTR [edi+eax*4]
mov DWORD PTR [edi+ecx*4], edx
jmp @@GENERATE_TABLE

@@SHIFTTABLE_VALUE_IS_EAX:

mov DWORD PTR [edi+ecx*4], eax
jmp @@GENERATE_TABLE

@@TABLE_FINISHED:

;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
; Displays the shift table
;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
;
;xor ecx, ecx
;
;@@:
;
; cmp ecx, ebx
; ja @f
; push ecx
; push ebx
; push edi
; invoke dwtoa, DWORD PTR [edi+ecx*4], OFFSET gbGBuffer
; invoke MessageBox, 0, OFFSET gbGBuffer, 0, 0
; pop edi
; pop ebx
; pop ecx
; inc ecx
; jmp @b
;
;@@:
;
;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
;KMP routine
;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

;Initialization before KMP search

push ebx
mov eax, SourceLength
push eax

xor eax, eax
xor ecx, ecx
xor edx, edx
mov ebx, ShiftTable
mov esi, StringSource
mov edi, StringPattern

;Start KMP

@@KMP_SEARCH:

cmp ecx, DWORD PTR [esp]
je @@KMP_END
cmp eax, DWORD PTR [esp+4]
je @@KMP_END

@@INNER_KMP:

test eax, eax
js @@KMP_CONTINUE
mov dl, BYTE PTR [esi+ecx]
cmp dl, BYTE PTR [edi+eax]
je @@KMP_CONTINUE
mov eax, [ebx][eax*4]
jmp @@INNER_KMP

@@KMP_CONTINUE:

inc eax
inc ecx
jmp @@KMP_SEARCH

@@KMP_END:

pop edx
pop edx

cmp eax, edx
jne @@STRING_NOT_FOUND

sub ecx, edx
mov eax, ecx
ret

@@STRING_NOT_FOUND:

mov eax, -1
ret

KMP ENDP
[/size]



Return Value: EAX

If the string is not found, it will return -1 else it will return the position of the string.

The ShiftTable parameter is a dynamically allocated dword size array with the number of elements based on the pattern length. I tested this one on almost any string combo I can think of, and everything works perfectly. I just want to know If I did it right this time.

Thanks!!! :)

P.S. It would be much better If I move this post to a new thread to reduce the confusion between hybrid and the real KMP. Also the thread "Hybrid KMP" I so convoluted with codes and would make it a little perplexing for readers which is the right or wrong KMP string search algorithm.
Posted on 2002-03-09 15:37:12 by stryker
I updated it quite a bit, this is a little bit optimized than the first one. Here's the code.




[size=9]
KMP PROC USES ebx esi edi StringSource:DWORD, SourceLength:DWORD, StringPattern:DWORD, PatternLength:DWORD, ShiftTable:DWORD

mov eax, PatternLength
push eax
mov eax, SourceLength
push eax
mov eax, -1
mov ebx, ShiftTable
xor ecx, ecx
mov [ebx][ecx*4], eax
mov esi, StringSource
mov edi, StringPattern

@@GENERATE_TABLE:

cmp ecx, DWORD PTR [esp+4]
je @@TABLE_FINISHED

@@INNER_TABLE_LOOP:

test eax, eax
js @@CONTINUE_TABLE
mov dl, BYTE PTR [edi+ecx]
cmp dl, BYTE PTR [edi+eax]
je @@CONTINUE_TABLE
mov eax, [ebx][eax*4]
jmp @@INNER_TABLE_LOOP

@@CONTINUE_TABLE:

inc ecx
inc eax
mov dl, BYTE PTR [edi+ecx]
cmp dl, BYTE PTR [edi+eax]
jne @@SHIFTTABLE_VALUE_IS_EAX
mov edx, [ebx][eax*4]
mov [ebx][ecx*4], edx
jmp @@GENERATE_TABLE

@@SHIFTTABLE_VALUE_IS_EAX:

mov [ebx][ecx*4], eax
jmp @@GENERATE_TABLE

@@TABLE_FINISHED:

xor eax, eax
xor ecx, ecx

@@KMP_SEARCH:

cmp eax, DWORD PTR [esp]
je @@KMP_END
cmp ecx, DWORD PTR [esp+4]
je @@KMP_END

@@INNER_KMP:

test ecx, ecx
js @@KMP_CONTINUE
mov dl, BYTE PTR [esi+eax]
cmp dl, BYTE PTR [edi+ecx]
je @@KMP_CONTINUE
mov ecx, [ebx][ecx*4]
jmp @@INNER_KMP

@@KMP_CONTINUE:

inc eax
inc ecx
jmp @@KMP_SEARCH

@@KMP_END:

pop edx
pop edx

cmp ecx, edx
jne @@STRING_NOT_FOUND

sub eax, edx
ret

@@STRING_NOT_FOUND:

mov eax, -1
ret

KMP ENDP
[/size]


Hmm, seems like no one is objecting to the code. :) So this must be the real KMP. :)

One question though: Is it slow to compare from the stack? I mean like cmp eax, DWORD PTR ...?
Posted on 2002-03-09 23:07:06 by stryker
stryker, looks really good. Notice how the preprocessing phase is like the search phase? Here is the code I'm using for the preprocessing. Maybe it can help in optimizing?
;     char_x  -  pointer to string to find

; int_m - length of string
; int_kmpNext - pointer to FSM buffer (int_m * 4 bytes)
preKMP MACRO char_x:REQ, int_m:REQ, int_kmpNext:REQ
LOCAL preLoop, _0, _1

xor int_i, int_i
or DWORD PTR [int_kmpNext], -1
preLoop:
mov int_j, [int_kmpNext + int_i*4]
inc int_i
inc int_j
mov al, [char_x + int_i - 1]
mov [int_kmpNext + int_i*4], int_j
_0: dec int_j
js _1
cmp al, [char_x + int_j]
je _1
mov int_j, [int_kmpNext + int_j*4]
inc int_j
mov [int_kmpNext + int_i*4], int_j
jmp _0
_1: cmp int_i, int_m
jne preLoop
ENDM
The parameters to the macro are all registers:
int_i TEXTEQU <edx> ; scratch registers

int_j TEXTEQU <ecx>

preKMP esi, edi, ebx
Posted on 2002-03-09 23:36:59 by bitRAKE
Yeah, there are some similarities. Thanks for the code, I'll look into that macro.
Posted on 2002-03-09 23:42:20 by stryker
Here's a proggy that performs the routine above.

Controls:

FIND - Always search at the first instance at the beginning of the string.
FIND NEXT - Continues to find the text what FIND has started.

Place your search terms on the small edit box and the source string on the other.

I'll leave more optimizations to those who wants to use it. I'll no longer be updating or optimizing this one because I have another project in mind.

Yay!!! we now have an ASM version of the KMP algo. :)

I'll release soon my own string search algo: ssrch :) -> this is the naive algorithm with skipping inner iteration feature, this would solve the preprocessing stage problem of the BM and KMP.


BitRAKE,

I wasn't able to look at your code closely, cause I was busy :)


run the .bat file to produce the exe :)
Posted on 2002-03-12 12:02:44 by stryker
Ooops, I forgot the documentation. I'll just crudely paste this here. If it's OK for everybody. :)



[b]Name Of Procedure:[/b] KMP
[b]Parameters:[/b]

StringSource - A pointer to a null terminated string
SourceLength - length of the string source
StringPattern - A pointer to a null terminated string
PatternLength - length of the string pattern
ShiftTable - A pointer to an array of dwords.

[b]Note:[/b]

ShiftTable is best suited to be a dynamically allocated
dword size array. The number of bytes to allocate is
calculated using this mathematical equation:

# of bytes to allocate = (PatternLength+1)*4

[b]Prototype:[/b] KMP PROTO :DWORD, :DWORD, :DWORD, :DWORD, :DWORD

[b]Return Values:[/b]

-1 String not found
N >= 0 Position of the string.




I found a bug, just add this code after the @@BTN_FINDNEXT: label



cmp FlagFind, 0h
je @@STRING_NOT_FOUND


You'll get an error if you haven't had clicked the FIND button. This only happens if you haven't started a search yet. :) But it's no big deal, just small squirmy bugs...
Posted on 2002-03-12 14:10:09 by stryker