I'm working on a few helpful string procedures (due to all the parsing questions -- including questions that I had). I hope this helps someone -- also optimization points are welcome.
3 procedures so far:
Find_First_Of
Find_Last_Of
Find_First_Not_Of
required arguements:
1) the string to be searched
2) the separators to be found
3) the starting position
How it works:
1) Find_First_Of
returns the first instance of a given separator
assuming the sentence was: "Hello everyone how are you doing"
and the separator was " " (a space) it would return the 5 in eax
first letter in the string is at starting position (0)
2)Find_Last_Of
find last occurance of separator
3)Find_First_Not_Of
find first occurance of something that's not a separator
3 procedures so far:
Find_First_Of
Find_Last_Of
Find_First_Not_Of
required arguements:
1) the string to be searched
2) the separators to be found
3) the starting position
How it works:
1) Find_First_Of
returns the first instance of a given separator
assuming the sentence was: "Hello everyone how are you doing"
and the separator was " " (a space) it would return the 5 in eax
first letter in the string is at starting position (0)
2)Find_Last_Of
find last occurance of separator
3)Find_First_Not_Of
find first occurance of something that's not a separator
; #########################################################################
;
; Find First Of / Find_Last_Of / Find_First_Not_Of
; Suppose you had a string -- a paragraph of prose, perhaps -- and you wanted
; break it up into individual words. You would need to find where the
; separators were, and those could be any of a number of different characters;
; there could be spaces, commas, periods, colons and so on. This is a procedure
; where for any one of a given set of characters occurs in a string -- this could
; tell you where the delimiter for the words are. I hope this makes someones
; life a little easier :-) Cheers, Walter Reid (Sliver)
;
;
; Works like this:
; invoke Find_First_Of, string to be searched, separators, starting position
; returns the locations of the first separator in eax
;
; invoke Find_First_Of, string to be searched, separators, starting position
; returns the location of the last separator in eax
; #########################################################################
.386
.model flat, stdcall
option casemap :none ; case sensitive
; #########################################################################
include \masm32\include\windows.inc
include \masm32\include\user32.inc
include \masm32\include\kernel32.inc
include \masm32\include\masm32.inc
include \masm32\include\debug.inc
includelib \masm32\lib\user32.lib
includelib \masm32\lib\kernel32.lib
includelib \masm32\lib\masm32.lib
includelib \masm32\lib\debug.lib
Main PROTO
Find_First_Of PROTO :DWORD, :DWORD, :DWORD
Find_Last_Of PROTO :DWORD, :DWORD, :DWORD
Find_First_Not_Of PROTO :DWORD, :DWORD, :DWORD
Find_Last_Of proc lpszSource:DWORD, lpszTarget:DWORD, StartPos:DWORD
LOCAL val:DWORD
mov val, 0
mov edi, lpszTarget
xor ecx, ecx
start_scan:
mov esi, lpszSource
add esi, StartPos
add esi, ecx
next:
mov al, byte ptr [esi]
inc esi
cmp al, byte ptr [edi]
je found
inc ecx
cmp al, 0
jne next
found2:
mov ecx, val
inc edi
cmp byte ptr [edi], 0
jne start_scan
jmp done
found:
mov val, ecx
jmp found2
done:
mov eax, ecx
ret
Find_Last_Of endp
Find_First_Of proc lpszSource:DWORD, lpszTarget:DWORD, StartPos:DWORD
LOCAL val:DWORD
mov val, 100
mov edi, lpszTarget
start_scan:
mov esi, lpszSource
add esi, StartPos
xor ecx, ecx
next:
mov al, byte ptr [esi]
inc esi
cmp ecx, val
je found
cmp al, byte ptr [edi]
je found
inc ecx
cmp al, 0
je start_scan
jmp next
found:
mov val, ecx
xor ecx, ecx
inc edi
cmp byte ptr [edi], 0
jne start_scan
done:
mov eax, val
add eax, StartPos
ret
Find_First_Of endp
Find_First_Not_Of proc lpszSource:DWORD, lpszTarget:DWORD, StartPos:DWORD
LOCAL val:DWORD
mov val, 100
xor ecx, ecx
mov esi, lpszSource
add esi, StartPos
start_scan:
mov edi, lpszTarget
next:
mov al, byte ptr [esi]
cmp al, 0
je done
cmp al, byte ptr [edi]
jne no_match
match:
inc esi
cmp byte ptr [esi], 0
je done
inc ecx
jmp start_scan
no_match:
inc edi
cmp byte ptr [edi], 0
jne next
mov val, ecx
done:
mov eax, val
add eax, StartPos
ret
Find_First_Not_Of endp
; #########################################################################
.data
Msg1 db "Hi! My name is Walter. How are you?",0
Msg2 db "aeioufaefeaio",0
Txt db " ?.!,",0
Txt2 db "uoaei",0
; #########################################################################
.code
start:
invoke Main
invoke ExitProcess,0
Main proc
invoke Find_First_Of, ADDR Msg1, ADDR Txt, 0
PrintText "Find the first separator ( ?.!,) -- starting at pos 0"
PrintText "in the sentance 'Hi! My name is Walter. How are you?'"
PrintDec eax
PrintText "Value returned is from the first character (0)"
PrintText " "
PrintText " "
invoke Find_First_Of, ADDR Msg1, ADDR Txt2, 14
PrintText "Find the first vowel (uoaei) -- starting at pos 14 (space after 'is')"
PrintText "in the sentance 'Hi! My name is Walter. How are you?'"
PrintDec eax
PrintText "Value returned is from the first character (0)"
PrintText " "
PrintText " "
PrintText " "
PrintText " "
invoke Find_Last_Of, ADDR Msg1, ADDR Txt, 0
PrintText "Find the last separator ( ?.!,) -- starting at pos 0"
PrintText "in the sentance 'Hi! My name is Walter. How are you?'"
PrintDec eax
PrintText "Value returned is from the first character (0)"
PrintText " "
PrintText " "
invoke Find_Last_Of, ADDR Msg1, ADDR Txt2, 0
PrintText "Find the last vowel (uoaei) -- starting at pos 0"
PrintText "in the sentance 'Hi! My name is Walter. How are you?'"
PrintDec eax
PrintText "Value returned is from the first character (0)"
PrintText " "
PrintText " "
PrintText " "
PrintText " "
invoke Find_First_Not_Of, ADDR Msg2, ADDR Txt2, 0
PrintText "Find the first not of separator (uoaei) -- starting at pos 0"
PrintText "in the sentance 'aeioufaefeaio'"
PrintDec eax
PrintText "Value returned is from the first character (0)"
PrintText " "
PrintText " "
invoke Find_First_Not_Of, ADDR Msg2, ADDR Txt2, 6
PrintText "Find the first not of separator (uoaei) -- starting at pos 6"
PrintText "in the sentance 'aeioufaefeaio'"
PrintDec eax
PrintText "Value returned is from the first character (0)"
ret
Main endp
end start
Very nice indeed, here's another useful function:
Usage is simple, call the function with a pointer to the string you wish to parse, the start position and a pointer to a buffer to contain the parse part.
.data
szTest db "This is a test",0
.data?
Pos dd ?
Buf db 64 dup (?)
.code
Invoke ParseString,addr szTest,0,addr Buf
mov Pos,eax ; Buf contains "This",0
Invoke ParseString,addr szTest,Pos,addr Buf
mov Pos,eax ; Buf contain "is",0
...
...
Hope someone finds it helpful.
ParseString Proc uses ebx esi edi pStr:DWORD,sPos:DWORD,pBuf:DWORD
InRange MACRO a,b,c
lea ecx,[a-b]
lea edx,[a-c-1]
xor edx,ecx
or ebx,edx
EndM
Ranges MACRO
InRange eax,'a','z'
InRange eax,'0','9'
InRange eax,'A','Z'
EndM
mov esi,pStr
mov edi,pBuf
add esi,sPos
assume esi:ptr byte
assume edi:ptr byte
@@: movzx eax,[esi]
xor ebx,ebx
test eax,eax
jz nlb
Ranges
js @F
inc esi
jmp @B
@@: mov [edi],al
inc esi
inc edi
movzx eax,[esi]
xor ebx,ebx
test eax,eax
jz nlb
Ranges
js @B
nlb:mov [edi],0
mov eax,esi
sub eax,pStr
ret
ParseString EndP
Usage is simple, call the function with a pointer to the string you wish to parse, the start position and a pointer to a buffer to contain the parse part.
.data
szTest db "This is a test",0
.data?
Pos dd ?
Buf db 64 dup (?)
.code
Invoke ParseString,addr szTest,0,addr Buf
mov Pos,eax ; Buf contains "This",0
Invoke ParseString,addr szTest,Pos,addr Buf
mov Pos,eax ; Buf contain "is",0
...
...
Hope someone finds it helpful.
This isn't really a "gem" nor it is helpful but it's fun :grin: he! he! he!
String Reverse
Output: dlrow leurc olleh
Reverses string until the center character then reverses up the string again.
Output: dlrow leuel world
You can remove that mov ecx, eax and change some parts of the code. I love to use ecx in this way :grin:
String Reverse
Output: dlrow leurc olleh
.386
.MODEL flat, stdcall
option casemap:none
INCLUDE \masm32\include\windows.inc
INCLUDE \masm32\include\kernel32.inc
INCLUDELIB \masm32\lib\kernel32.lib
INCLUDE \masm32\include\user32.inc
INCLUDELIB \masm32\lib\user32.lib
.data
mystringdata db "hello cruel world", 0
buffer db 20 DUP(0)
.code
Start:
invoke lstrlen, OFFSET mystringdata
mov ecx, eax
mov esi, OFFSET mystringdata
mov edi, OFFSET buffer
@@:
dec ecx
mov dl, BYTE ptr [esi+ecx]
mov BYTE ptr[edi], dl
inc edi
or ecx, ecx
ja @b
invoke MessageBox, 0, OFFSET buffer, 0, 0
invoke ExitProcess, 0
END Start
Reverses string until the center character then reverses up the string again.
Output: dlrow leuel world
.386
.MODEL flat, stdcall
option casemap:none
INCLUDE \masm32\include\windows.inc
INCLUDE \masm32\include\kernel32.inc
INCLUDELIB \masm32\lib\kernel32.lib
INCLUDE \masm32\include\user32.inc
INCLUDELIB \masm32\lib\user32.lib
.data
mystringdata db "hello cruel world", 0
.code
Start:
invoke lstrlen, OFFSET mystringdata
mov ecx, eax
mov esi, OFFSET mystringdata
mov edi, OFFSET mystringdata
@@:
dec ecx
mov dl, BYTE ptr [esi+ecx]
mov BYTE ptr[edi], dl
inc edi
or ecx, ecx
ja @b
mov BYTE ptr[edi], cl
invoke MessageBox, 0, OFFSET mystringdata, 0, 0
invoke ExitProcess, 0
END Start
You can remove that mov ecx, eax and change some parts of the code. I love to use ecx in this way :grin:
umberg6007 ,
Don't be so hasty! I need to reverse strings all the time. So I'll definitely have a look at your routine for any new tricks.
DNA, as you may already know, has two strands that are paired up in a way so that you only have to describe one & the other is the "reverse complement" which you can calculate quite easily by reversing the string then swapping a<-->t & c<-->g. Suffice it to say that there are many situations where it's quicker to actually do this calculation up front than to fudge it on the fly.
Don't be so hasty! I need to reverse strings all the time. So I'll definitely have a look at your routine for any new tricks.
DNA, as you may already know, has two strands that are paired up in a way so that you only have to describe one & the other is the "reverse complement" which you can calculate quite easily by reversing the string then swapping a<-->t & c<-->g. Suffice it to say that there are many situations where it's quicker to actually do this calculation up front than to fudge it on the fly.
rafe,
DNA sounds like a lot of fun in the string parsing area, tons of room for creative thinking in byte twiddling.
Regards,
hutch@movsd.com
DNA sounds like a lot of fun in the string parsing area, tons of room for creative thinking in byte twiddling.
Regards,
hutch@movsd.com
DNA? I didn't know that it's the same at the opposite end starting at the center, must have slipped my mind. Time to dig up my biology book :grin: he! he! he!
truth is I'm over my head & drowning & loving it all the same ... But the boss likes the work so far & I keep the post-docs in line... mostly. I only had one formal programming course in my life & I'm already having to deal with big-O, wacked algos on strings & trees on a daily basis... data mining (what me gloat?)
String stuff is a must in bio-comp but the field is rapidly moving to Diff eq systems to model the molecules. :grin: more to learn
Unfortunately, I've got little time to actually code in asm but the creative stuff & learning is fun. Work's a handful & the current home situation doesn't allow me much down time right now.
hutch--, Oh yea, that string search variant I was rambling on about a few weeks back is already in the lit... no IP issues. more later in another thread.
mmm.... not from the center oops ! caught me skimming. humble apologies. but restriction enzymes look for....
String stuff is a must in bio-comp but the field is rapidly moving to Diff eq systems to model the molecules. :grin: more to learn
Unfortunately, I've got little time to actually code in asm but the creative stuff & learning is fun. Work's a handful & the current home situation doesn't allow me much down time right now.
hutch--, Oh yea, that string search variant I was rambling on about a few weeks back is already in the lit... no IP issues. more later in another thread.
mmm.... not from the center oops ! caught me skimming. humble apologies. but restriction enzymes look for....