I'm working on a few helpful string procedures (due to all the parsing questions -- including questions that I had). I hope this helps someone -- also optimization points are welcome.

3 procedures so far:

Find_First_Of
Find_Last_Of
Find_First_Not_Of
required arguements:
1) the string to be searched
2) the separators to be found
3) the starting position

How it works:
1) Find_First_Of
returns the first instance of a given separator
assuming the sentence was: "Hello everyone how are you doing"
and the separator was " " (a space) it would return the 5 in eax
first letter in the string is at starting position (0)

2)Find_Last_Of
find last occurance of separator

3)Find_First_Not_Of
find first occurance of something that's not a separator



; #########################################################################
;
; Find First Of / Find_Last_Of / Find_First_Not_Of
; Suppose you had a string -- a paragraph of prose, perhaps -- and you wanted
; break it up into individual words. You would need to find where the
; separators were, and those could be any of a number of different characters;
; there could be spaces, commas, periods, colons and so on. This is a procedure
; where for any one of a given set of characters occurs in a string -- this could
; tell you where the delimiter for the words are. I hope this makes someones
; life a little easier :-) Cheers, Walter Reid (Sliver)
;
;
; Works like this:
; invoke Find_First_Of, string to be searched, separators, starting position
; returns the locations of the first separator in eax
;
; invoke Find_First_Of, string to be searched, separators, starting position
; returns the location of the last separator in eax
; #########################################################################

.386
.model flat, stdcall
option casemap :none ; case sensitive

; #########################################################################

include \masm32\include\windows.inc

include \masm32\include\user32.inc
include \masm32\include\kernel32.inc
include \masm32\include\masm32.inc
include \masm32\include\debug.inc

includelib \masm32\lib\user32.lib
includelib \masm32\lib\kernel32.lib
includelib \masm32\lib\masm32.lib
includelib \masm32\lib\debug.lib

Main PROTO
Find_First_Of PROTO :DWORD, :DWORD, :DWORD
Find_Last_Of PROTO :DWORD, :DWORD, :DWORD
Find_First_Not_Of PROTO :DWORD, :DWORD, :DWORD

Find_Last_Of proc lpszSource:DWORD, lpszTarget:DWORD, StartPos:DWORD
LOCAL val:DWORD

mov val, 0
mov edi, lpszTarget
xor ecx, ecx

start_scan:
mov esi, lpszSource
add esi, StartPos
add esi, ecx

next:
mov al, byte ptr [esi]
inc esi

cmp al, byte ptr [edi]
je found
inc ecx

cmp al, 0
jne next

found2:
mov ecx, val
inc edi
cmp byte ptr [edi], 0
jne start_scan
jmp done

found:
mov val, ecx
jmp found2

done:
mov eax, ecx

ret
Find_Last_Of endp


Find_First_Of proc lpszSource:DWORD, lpszTarget:DWORD, StartPos:DWORD
LOCAL val:DWORD

mov val, 100

mov edi, lpszTarget

start_scan:
mov esi, lpszSource
add esi, StartPos
xor ecx, ecx

next:
mov al, byte ptr [esi]
inc esi

cmp ecx, val
je found

cmp al, byte ptr [edi]
je found
inc ecx

cmp al, 0
je start_scan

jmp next

found:
mov val, ecx
xor ecx, ecx

inc edi
cmp byte ptr [edi], 0
jne start_scan

done:
mov eax, val
add eax, StartPos
ret
Find_First_Of endp

Find_First_Not_Of proc lpszSource:DWORD, lpszTarget:DWORD, StartPos:DWORD
LOCAL val:DWORD

mov val, 100
xor ecx, ecx

mov esi, lpszSource
add esi, StartPos

start_scan:
mov edi, lpszTarget

next:
mov al, byte ptr [esi]
cmp al, 0
je done

cmp al, byte ptr [edi]
jne no_match

match:
inc esi
cmp byte ptr [esi], 0
je done

inc ecx
jmp start_scan

no_match:
inc edi
cmp byte ptr [edi], 0
jne next

mov val, ecx

done:
mov eax, val
add eax, StartPos

ret
Find_First_Not_Of endp
; #########################################################################

.data
Msg1 db "Hi! My name is Walter. How are you?",0
Msg2 db "aeioufaefeaio",0
Txt db " ?.!,",0
Txt2 db "uoaei",0
; #########################################################################


.code

start:
invoke Main
invoke ExitProcess,0

Main proc

invoke Find_First_Of, ADDR Msg1, ADDR Txt, 0
PrintText "Find the first separator ( ?.!,) -- starting at pos 0"
PrintText "in the sentance 'Hi! My name is Walter. How are you?'"
PrintDec eax
PrintText "Value returned is from the first character (0)"

PrintText " "
PrintText " "

invoke Find_First_Of, ADDR Msg1, ADDR Txt2, 14
PrintText "Find the first vowel (uoaei) -- starting at pos 14 (space after 'is')"
PrintText "in the sentance 'Hi! My name is Walter. How are you?'"
PrintDec eax
PrintText "Value returned is from the first character (0)"

PrintText " "
PrintText " "
PrintText " "
PrintText " "

invoke Find_Last_Of, ADDR Msg1, ADDR Txt, 0
PrintText "Find the last separator ( ?.!,) -- starting at pos 0"
PrintText "in the sentance 'Hi! My name is Walter. How are you?'"
PrintDec eax
PrintText "Value returned is from the first character (0)"

PrintText " "
PrintText " "

invoke Find_Last_Of, ADDR Msg1, ADDR Txt2, 0
PrintText "Find the last vowel (uoaei) -- starting at pos 0"
PrintText "in the sentance 'Hi! My name is Walter. How are you?'"
PrintDec eax
PrintText "Value returned is from the first character (0)"


PrintText " "
PrintText " "
PrintText " "
PrintText " "

invoke Find_First_Not_Of, ADDR Msg2, ADDR Txt2, 0
PrintText "Find the first not of separator (uoaei) -- starting at pos 0"
PrintText "in the sentance 'aeioufaefeaio'"
PrintDec eax
PrintText "Value returned is from the first character (0)"

PrintText " "
PrintText " "

invoke Find_First_Not_Of, ADDR Msg2, ADDR Txt2, 6
PrintText "Find the first not of separator (uoaei) -- starting at pos 6"
PrintText "in the sentance 'aeioufaefeaio'"
PrintDec eax
PrintText "Value returned is from the first character (0)"


ret

Main endp

end start







Posted on 2002-02-03 11:00:33 by Sliver
Very nice indeed, here's another useful function:

ParseString Proc uses ebx esi edi pStr:DWORD,sPos:DWORD,pBuf:DWORD

InRange MACRO a,b,c
lea ecx,[a-b]
lea edx,[a-c-1]
xor edx,ecx
or ebx,edx
EndM

Ranges MACRO
InRange eax,'a','z'
InRange eax,'0','9'
InRange eax,'A','Z'
EndM

mov esi,pStr
mov edi,pBuf
add esi,sPos
assume esi:ptr byte
assume edi:ptr byte

@@: movzx eax,[esi]
xor ebx,ebx
test eax,eax
jz nlb
Ranges
js @F
inc esi
jmp @B

@@: mov [edi],al
inc esi
inc edi

movzx eax,[esi]
xor ebx,ebx
test eax,eax
jz nlb
Ranges
js @B

nlb:mov [edi],0
mov eax,esi
sub eax,pStr
ret
ParseString EndP


Usage is simple, call the function with a pointer to the string you wish to parse, the start position and a pointer to a buffer to contain the parse part.

.data
szTest db "This is a test",0

.data?
Pos dd ?
Buf db 64 dup (?)

.code
Invoke ParseString,addr szTest,0,addr Buf
mov Pos,eax ; Buf contains "This",0

Invoke ParseString,addr szTest,Pos,addr Buf
mov Pos,eax ; Buf contain "is",0

...
...

Hope someone finds it helpful.
Posted on 2002-02-03 11:32:01 by Eóin
This isn't really a "gem" nor it is helpful but it's fun :grin: he! he! he!

String Reverse
Output: dlrow leurc olleh



.386
.MODEL flat, stdcall
option casemap:none

INCLUDE \masm32\include\windows.inc
INCLUDE \masm32\include\kernel32.inc
INCLUDELIB \masm32\lib\kernel32.lib
INCLUDE \masm32\include\user32.inc
INCLUDELIB \masm32\lib\user32.lib

.data

mystringdata db "hello cruel world", 0
buffer db 20 DUP(0)

.code

Start:

invoke lstrlen, OFFSET mystringdata
mov ecx, eax
mov esi, OFFSET mystringdata
mov edi, OFFSET buffer

@@:
dec ecx
mov dl, BYTE ptr [esi+ecx]
mov BYTE ptr[edi], dl
inc edi
or ecx, ecx
ja @b

invoke MessageBox, 0, OFFSET buffer, 0, 0
invoke ExitProcess, 0

END Start


Reverses string until the center character then reverses up the string again.
Output: dlrow leuel world



.386
.MODEL flat, stdcall
option casemap:none

INCLUDE \masm32\include\windows.inc
INCLUDE \masm32\include\kernel32.inc
INCLUDELIB \masm32\lib\kernel32.lib
INCLUDE \masm32\include\user32.inc
INCLUDELIB \masm32\lib\user32.lib

.data

mystringdata db "hello cruel world", 0

.code

Start:

invoke lstrlen, OFFSET mystringdata
mov ecx, eax
mov esi, OFFSET mystringdata
mov edi, OFFSET mystringdata

@@:
dec ecx
mov dl, BYTE ptr [esi+ecx]
mov BYTE ptr[edi], dl
inc edi
or ecx, ecx
ja @b

mov BYTE ptr[edi], cl

invoke MessageBox, 0, OFFSET mystringdata, 0, 0
invoke ExitProcess, 0

END Start


You can remove that mov ecx, eax and change some parts of the code. I love to use ecx in this way :grin:
Posted on 2002-02-03 13:00:31 by stryker
Posted on 2002-02-03 13:54:26 by stryker
umberg6007 ,

Don't be so hasty! I need to reverse strings all the time. So I'll definitely have a look at your routine for any new tricks.

DNA, as you may already know, has two strands that are paired up in a way so that you only have to describe one & the other is the "reverse complement" which you can calculate quite easily by reversing the string then swapping a<-->t & c<-->g. Suffice it to say that there are many situations where it's quicker to actually do this calculation up front than to fudge it on the fly.
Posted on 2002-02-03 17:14:16 by rafe
rafe,

DNA sounds like a lot of fun in the string parsing area, tons of room for creative thinking in byte twiddling.

Regards,

hutch@movsd.com
Posted on 2002-02-03 17:30:19 by hutch--
DNA? I didn't know that it's the same at the opposite end starting at the center, must have slipped my mind. Time to dig up my biology book :grin: he! he! he!
Posted on 2002-02-03 17:49:42 by stryker
truth is I'm over my head & drowning & loving it all the same ... But the boss likes the work so far & I keep the post-docs in line... mostly. I only had one formal programming course in my life & I'm already having to deal with big-O, wacked algos on strings & trees on a daily basis... data mining (what me gloat?)

String stuff is a must in bio-comp but the field is rapidly moving to Diff eq systems to model the molecules. :grin: more to learn

Unfortunately, I've got little time to actually code in asm but the creative stuff & learning is fun. Work's a handful & the current home situation doesn't allow me much down time right now.

hutch--, Oh yea, that string search variant I was rambling on about a few weeks back is already in the lit... no IP issues. more later in another thread.

mmm.... not from the center oops ! caught me skimming. humble apologies. but restriction enzymes look for....
Posted on 2002-02-03 18:06:01 by rafe