Here is a function I have been working on, almost proud of, and it hopefully mimics the CRTDLL function, let me know what you think, crits, or alterations.. it takes care of any extra tokens within the string as well. It manipulates 5 registers.





StrTok PROC bStr:DWORD, pStr:DWORD
;uses the four 32-bit registers eax,ebx,ecx,edx - and as an added bonus, edi!!!
;bStr = input string -null terminated and/or CRLF terminated
;pStr = tokens - null terminated - including CRLF should token that out too
; bRet is a static buffer to contain the returned text
; see second section of code posted for variables and types
; use EDX register to continue stripping the string, see sample code below

mov edx, bStr
mov edi, OFFSET bRet
xor eax,eax
xor ecx,ecx

sloop1:
mov al, [edx]
cmp al,0
je sloop1e ;jump if end of input string
cmp al,13
je sloop1n ;jump if end of line
cmp al,10
je sloop1n ;jump just to be safe if only 0A is present so first set ret value to 0
jmp sloop1a

sloop1n:
xor eax,eax ;now is tested, so to put StrTok() into a loop
jmp exitloop ;return with eax==0 when end of the line <<<< TESTED, works

sloop1a:
;xor ecx,ecx ;to strip everything out except the strings, makes one long concatenation
mov ebx, pStr ;move token to ebx

sloop1b: ;the token loop =)
mov cl,[ebx]
cmp al,cl
je sloop1c ;found a matching token, so we need to keep moving in our string
cmp cl,0
je sloop1d ;you have reached the end of the token string and found no match, add to bRet[]
inc ebx
jmp sloop1b ;found no match or end, keep looking for a match until found or end

sloop1c:
;heh here is where we find the first+other non-token strings
;now set a 'flag' to indicate we found the starting position so we can get an ending position
;to return the field found
cmp ch,1
je sloop1e
inc edx
jmp sloop1

sloop1d: ;only call this if no matches above
mov ch,1 ;simple flag for checking if found the first token
inc edx ;added just now
mov BYTE PTR [edi], al
inc edi
jmp sloop1

sloop1e:
mov BYTE PTR [edi], 0
mov eax,edi ;move count to eax return value
exitloop:
ret

StrTok endp


Using intialization:



.data
fmtTxt db 40 DUP(?)
alf2 db "%s",0
bRet db 80 DUP(0)
fspc db 'f /',0 ;The tokens to separate the text out with
FOO db 'f 256//16 15//231 34//151',13,10,0


To test:



mov edx, OFFSET FOO ;notice that to continue parsing the string, need to preserve edx pointer

invoke StrTok, edx, ADDR fspc

pushad
invoke wsprintf, ADDR fmtTxt, ADDR alf2, ADDR bRet
invoke MessageBox,NULL,ADDR fmtTxt, ADDR fmtTxt, MB_OK
popad

invoke StrTok, edx, ADDR fspc

pushad
invoke wsprintf, ADDR fmtTxt, ADDR alf2, ADDR bRet
invoke MessageBox,NULL,ADDR fmtTxt, ADDR fmtTxt, MB_OK
popad



I did go through an entire string loaded into memory, course I had to find the start of the string on each line to parse through it otherwise would be inefficient loop.
Posted on 2004-01-21 16:39:23 by drarem
StrTok v0.2


- cleans up the global 'return' string, eg the Back: loop
- returns '1' if successful in finding a token and returning a string



StrTok PROC bStr:DWORD, pStr:DWORD
;bStr = string to be parsed, such as: db,"C:\current_directory\filename1.mph",0
;pStr = tokens, such as: db,"\.",0
;bRet = your string to define, such as: (.data) bRet db 80 dup(0)
;uses the four 32-bit registers eax,ebx,ecx, and edx, and the edi/esi
;preserve edx pointer between calls to this function
;to continue stripping of the same line
;a '0' should be returned
;when done with line, untested as of yet..

mov edx, bStr
mov edi, OFFSET bRet
lea esi, bRet
xor ecx,ecx
Back:
mov BYTE PTR [esi], 0
inc esi
inc ecx
cmp ecx,80 ;size of bRet, in this case 80 bytes
jle Back ;this clean-up loop can definitely be optimized

xor eax,eax
xor ecx,ecx

sloop1:
mov al, [edx]
cmp al,0
je sloop1e ;jump if end of input string
cmp al,13
je sloop1n ;jump if end of line - make eax==0 to know end of line for looping
cmp al,10
je sloop1n ;jump just to be safe if only 0A is present so first set ret value to 0
jmp sloop1a

sloop1n:
xor eax,eax ;this is untested, so to put StrTok() into a loop
jmp exitloop ;return with eax==0 when end of the line

sloop1a:
;xor ecx,ecx ;to strip everything out except the strings, makes one long concatenation
mov ebx, pStr ;move token to ebx

sloop1b: ;token loop
mov cl,[ebx]
cmp al,cl
je sloop1c ;found a matching token, so we need to keep moving in our string
cmp cl,0
je sloop1d ;you have reached the end of the token string and found no match, add to bRet[]
inc ebx
jmp sloop1b ;found no match or end, keep looking for a match until found or end

sloop1c:
cmp ch,1 ;set 'flag' here to indicate starting position in string
je sloop1e
inc edx
jmp sloop1

sloop1d: ;only call this if no matches above
mov ch,1 ;simple flag for checking if found the first token
inc edx ;added just now
mov BYTE PTR [edi], al
inc edi
jmp sloop1

sloop1e:
mov BYTE PTR [edi], 0 ;null-terminate the string
mov eax,1 ;move success to eax
exitloop:
ret

StrTok endp
Posted on 2004-01-27 08:00:21 by drarem
haven't really looked at or tested your code (I probably wouldn't use strtok if I need high-speed string parsing anyway), but one thing that comes to mind: you should really preserve ebx,esi,edi , or you might end up in trouble.
Posted on 2004-01-27 10:52:32 by f0dder
Wow,

That looks like a pretty nice routine. I have my own StrToken routine that I use that is much simpler but then it only does 1 byte tokens. I find single byte tokens is all I need as mostly I am searching for \ or , . This works exactly like the MSVCRT strtok function. The first time you call it you give it an address to the string then each subsequent call you use NULL instead of the string address. It will replace the token with NULL so it destroys the string but I have never had a situation where I needed to keep the string after parsing anyway...

; Note in GoAsm the "," will generate an OFFSET to a string

; containing ",",0. It is equiv to CTEXT() or whatever it is in MASM

invoke StrToken,OFFSET TestStr,","
PrintStringByAddr(eax)
invoke StrToken,NULL,","
PrintStringByAddr(eax)
invoke StrToken,NULL,","
PrintStringByAddr(eax)

.data

NextToken DD ?
LenToken DD ?
.code
StrToken FRAME pString,pToken
uses edi,ebx,ecx

mov eax,[pToken]
movzx ebx,B[eax]

mov edi,[pString]
or edi,edi
jz >
mov [NextToken],edi
invoke StrLen,edi
mov [LenToken],eax
jmp >L1
:
mov edi,[NextToken]
L1:
mov ecx,[LenToken]
mov al,bl
repne scasb

cmp B[edi-1],al
jne >
mov B[edi-1],0
:
mov eax,[NextToken]
mov [NextToken],edi
mov [LenToken],ecx
RET
ENDF
Posted on 2004-02-02 15:08:25 by donkey