GetStr proc szSource:dword, szDestination:dword, szParameter:dword, szDelimiter:byte
push esi
push edi
push ebx
mov esi, szSource
mov edi, szDestination
xor ebx, ebx
@@Retrieve:
mov al,
inc esi
test al, al
jz @@Retrieved
mov , al
inc edi
cmp al, szDelimiter
jnz @@Retrieve
@@Retrieved:
inc ebx
cmp ebx, szParameter
jz @@Retrieved2
mov edi, szDestination
mov ecx, 1024
mov al, 0
rep stosb
mov edi, szDestination
jmp @@Retrieve
@@Retrieved2:
mov byte ptr , 0
pop esi
pop edi
pop ebx
ret
GetStr endp
As always, optimizations are welcome :)
push esi
push edi
push ebx
mov esi, szSource
mov edi, szDestination
xor ebx, ebx
@@Retrieve:
mov al,
inc esi
test al, al
jz @@Retrieved
mov , al
inc edi
cmp al, szDelimiter
jnz @@Retrieve
@@Retrieved:
inc ebx
cmp ebx, szParameter
jz @@Retrieved2
mov edi, szDestination
mov ecx, 1024
mov al, 0
rep stosb
mov edi, szDestination
jmp @@Retrieve
@@Retrieved2:
mov byte ptr , 0
pop esi
pop edi
pop ebx
ret
GetStr endp
As always, optimizations are welcome :)
You are poping wrong values.You should do in this way
pop ebx
pop edi
pop esi
or better use use keyword like
GetStr proc uses edi esi ebx szSource:dword, szDestination:dword, szParameter:dword, szDelimiter:byte
..
ret
GetStr endp
if you use use keyword you dont need to push and pop values to save registers.
pop ebx
pop edi
pop esi
or better use use keyword like
GetStr proc uses edi esi ebx szSource:dword, szDestination:dword, szParameter:dword, szDelimiter:byte
..
ret
GetStr endp
if you use use keyword you dont need to push and pop values to save registers.
Could you explain please what you code is doing?
Especially what is in parameter, and what this magic
code is doing?
inc ebx
cmp ebx, szParameter
jz @@Retrieved2
mov edi, szDestination
mov ecx, 1024 ;why 1024 ?
mov al, 0 ;why did you zero part that you just filled?
rep stosb
mov edi, szDestination
jmp @@Retrieve
You want to retrive string after delemeter specified in Parameter?
Then find address of needed delemeter first and only then copy part
untill nexe delemetor or zero. Why those needless copy - zeroing?
The code can be optimized of course but before it the subject of the task
needed to be clear. Most powerfull optimization is optimization made in
our heads while we researching subject of task in depth before first
carachter typed in code source.
Especially what is in parameter, and what this magic
code is doing?
inc ebx
cmp ebx, szParameter
jz @@Retrieved2
mov edi, szDestination
mov ecx, 1024 ;why 1024 ?
mov al, 0 ;why did you zero part that you just filled?
rep stosb
mov edi, szDestination
jmp @@Retrieve
You want to retrive string after delemeter specified in Parameter?
Then find address of needed delemeter first and only then copy part
untill nexe delemetor or zero. Why those needless copy - zeroing?
The code can be optimized of course but before it the subject of the task
needed to be clear. Most powerfull optimization is optimization made in
our heads while we researching subject of task in depth before first
carachter typed in code source.
LaptoniC, thanks for pointing that out, i coded this a few days ago at 3 in the morning heh, anyway Svin, 1024 was the size of the buffer i had....so just change that to sizeof szDestination as for the mov al, 0 im not quite sure what you mean, but all it does if its the correct parameter to get, it puts null's in the buffer to get rid of the stuff already placed there.
Svin, what I think the code is doing is this:
There is some string in Source which is null or "Delimiter" delimited (say "," or such).
The proc seems to copy the nth section (as determined by nulls & delimiters) to the destination parameter (which seems to be a 1k buffer).
Source - is some null terminated string with a delimiter in it....
Destination - is a 1k buffer.
Parameter - is some count to select the 'n'th delimited item from source.
Delimiter - is the single byte character used as the delimiter.
The current code copys then determines whether or not it has reached the appropriate delimter to terminate... This is slow, as you will be memory to memory copying data that will ultimately be overwritten. Instead we can only do the copy at the appropriate time by decrementing the parameter count at the start, and addind in a little jump at the begining.
As an optimisation it is more speed related than code size :)
Also I shuffled registers to avoid using ebx, esi, and edi!
I think this should do the same job...
Of course you could probably neaten things up a bit, this was a quick glance at the problem.
Mirno
There is some string in Source which is null or "Delimiter" delimited (say "," or such).
The proc seems to copy the nth section (as determined by nulls & delimiters) to the destination parameter (which seems to be a 1k buffer).
Source - is some null terminated string with a delimiter in it....
Destination - is a 1k buffer.
Parameter - is some count to select the 'n'th delimited item from source.
Delimiter - is the single byte character used as the delimiter.
The current code copys then determines whether or not it has reached the appropriate delimter to terminate... This is slow, as you will be memory to memory copying data that will ultimately be overwritten. Instead we can only do the copy at the appropriate time by decrementing the parameter count at the start, and addind in a little jump at the begining.
As an optimisation it is more speed related than code size :)
Also I shuffled registers to avoid using ebx, esi, and edi!
I think this should do the same job...
dec Parameter ; For compatability reasons
mov edx, Source
xor ecx, ecx
cmp ecx, Parameter ; Do we want the first thing?....
je The_Copy
mov ah, Delimiter
Delimter_Search:
mov al, [edx]
inc edx
test al, al ; Test for the null terminator
jz The_Copy
cmp al, ah ; Test for the delimiter
jne Delimiter_Search
The_Copy:
inc ecx
cmp ecx, Parameter ; Is this the correct "item" ?
jne Delimiter_Search
mov ecx, Destination ; Count is no longer needed
mov al, [edx]
mov [ecx], al
inc edx
inc ecx
test al, al
jz The_End
cmp al, ah
jne The_Copy
The_End:
mov [ecx - 1], 0 ; It could be "delimiter", so
; lets be sure!
ret
Of course you could probably neaten things up a bit, this was a quick glance at the problem.
Mirno
1. If your proc format output in ASCIIZ then why you need to get read of anything?
2. Why first put it into destination and only then check if you put
the right thing? I'm asking it for the second time -
why can't you find address of right parameter and only then
copy to destination?
2. Why first put it into destination and only then check if you put
the right thing? I'm asking it for the second time -
why can't you find address of right parameter and only then
copy to destination?
The original code is definitely sub-optimal, my first attempt did what the Svin suggested, and only starts the copy to Destination when it is appropriate.
I did however realise that I had made a little mistake in my parameter selection (a mistake, and a missed optimisation)!
Here is some new code:
That should work better, in cases where Parameter was 1 it would most likely crash as it would be selecting the 4294967296th item from the list rather than the first! The rest should have been OK though.
Mirno
I did however realise that I had made a little mistake in my parameter selection (a mistake, and a missed optimisation)!
Here is some new code:
mov ah, Delimiter
mov edx, Source
mov ecx, Parameter
jmp The_Copy ; Do we want the first thing?....
Delimter_Search:
mov al, [edx]
inc edx
test al, al ; Test for the null terminator
jz The_Copy
cmp al, ah ; Test for the delimiter
jne Delimiter_Search
The_Copy:
dec ecx ; Is this the correct "item" ?
jne Delimiter_Search
mov ecx, Destination ; Count is no longer needed
mov al, [edx]
mov [ecx], al
inc edx
inc ecx
test al, al
jz The_End
cmp al, ah
jne The_Copy
The_End:
mov [ecx - 1], 0 ; It could be "delimiter", so
; lets be sure!
ret
That should work better, in cases where Parameter was 1 it would most likely crash as it would be selecting the 4294967296th item from the list rather than the first! The rest should have been OK though.
Mirno
Thank, Mirno.
I finally get it.
High level logic must be this then:
1. Source = Source start
Check if Param = 1
Yes - Go to 3.
2. Search for delemeter # = Param-1
not found - Go to 4
Source = addr of delemetr # = Param-1
3. Copy string from source to dest untill next byte is delemeter or zero
ret
4. Return Error
I finally get it.
High level logic must be this then:
1. Source = Source start
Check if Param = 1
Yes - Go to 3.
2. Search for delemeter # = Param-1
not found - Go to 4
Source = addr of delemetr # = Param-1
3. Copy string from source to dest untill next byte is delemeter or zero
ret
4. Return Error
This is attempt to rather not solution but show process how
algos and procs are created.
I need to know if anybody need such kind of post, or I'd better stop.
-----------------------------------------------------------------------------------------------------------
When Hutch was working on BM algo he wrote noticable words:
"Now I have all nessesary data to write BM algo"
This phrase is sign of real programmer, not just asm programmer but any real programmer.
Code just symbols we put our thoughts and knowlige in.
Before it the knowlige must be aquered and thoghts must come.
There is simple example how one can think step by step about subj proc:
First we create logic algos (as we would do for alien computer that
would can understand our words):
1. Source = Source start
Check if Param = 1
Yes - Go to 3.
2. Search for delemeter # = Param-1
not found - Go to 4
Source = addr of delemetr # = Param-1
3. Copy string from source to dest untill next byte is delemeter or zero
ret
4. Return Error
[1]. Finding the right place to copy from.
In the stage we can't know yet that:
- If there is param part in Source we searching for
- Delemeter show us only end of previous part.
This last condition is usual thing in programming and can be solved using
math logic
if n delemeter is end of part X_n and start of part X_n+1
then n-1 delemeter is start of part X_n
So should set counter for delemeter as Param-1.
The setting itselft give us greate opportunity not only set delemeter to right value
but also check condition in step 1. and possibility
that thecaller by mistake send 0 as Param:
mov ecx,Param
dec ecx ;ecx = Param-1 and if Param = 1 ecx = 0 elseif Param = 0 ecx < 0
je @copy ;Param is 1 - we can copy right now
js @error ;Param is zero - jump to @error
The other thing to keep in mind that delemeters in ASCIIZ is actually show start on next part
and if there is no "next part" 0 becomes a delemeter or terminator.
Because for us end of string should be either start the next part or end of the string
we treat both delemeter and zero as terminators.
Delemetors usually has less ASCII code then most of contents of string that means
that we can filter out 9x% of "not zero" byte by just one cmp both for delemeter and
zero, yet we need to do additional check if "zero probability" found
cmp byte,delemeter
je @delemeterfound
jnc @again ;not zero
cmp byte,1 ;yet may be byte just less then delemeter? set cary if zero
;this code will run very rare by mistake
jc @error
....
@error: sbb eax,eax ; -1 return if error
now 'cause if a = b then a-b = 0 we can substruct 1 from the counter each time we
found a delemeter untill it 0 cause counter set to Param-1 we'll find Delemetor
wich is start of Part
@again: mov al,
inc esi
cmp al,delemeter
je @delemeterfound ;less prob
jnc @again ;high prob taken
cmp al,1
jnc @again ; lowest prob guessed as not taken
@error: sub eax,eax
ret
@delemetorfound:
dec ecx
jne @again ;esi pointed to next byte.
;send bytes to edi until esi <> 0 and <> delemeter
@copy:
I thought it was important to show process how some proc is created, while
doing research on subject of task and analyzing possible solutions.
I can not be 100% sure that anybody need such kind of work, so let me
know please - shall I continue with this analyze, or it's worthless?
algos and procs are created.
I need to know if anybody need such kind of post, or I'd better stop.
-----------------------------------------------------------------------------------------------------------
When Hutch was working on BM algo he wrote noticable words:
"Now I have all nessesary data to write BM algo"
This phrase is sign of real programmer, not just asm programmer but any real programmer.
Code just symbols we put our thoughts and knowlige in.
Before it the knowlige must be aquered and thoghts must come.
There is simple example how one can think step by step about subj proc:
First we create logic algos (as we would do for alien computer that
would can understand our words):
1. Source = Source start
Check if Param = 1
Yes - Go to 3.
2. Search for delemeter # = Param-1
not found - Go to 4
Source = addr of delemetr # = Param-1
3. Copy string from source to dest untill next byte is delemeter or zero
ret
4. Return Error
[1]. Finding the right place to copy from.
In the stage we can't know yet that:
- If there is param part in Source we searching for
- Delemeter show us only end of previous part.
This last condition is usual thing in programming and can be solved using
math logic
if n delemeter is end of part X_n and start of part X_n+1
then n-1 delemeter is start of part X_n
So should set counter for delemeter as Param-1.
The setting itselft give us greate opportunity not only set delemeter to right value
but also check condition in step 1. and possibility
that thecaller by mistake send 0 as Param:
mov ecx,Param
dec ecx ;ecx = Param-1 and if Param = 1 ecx = 0 elseif Param = 0 ecx < 0
je @copy ;Param is 1 - we can copy right now
js @error ;Param is zero - jump to @error
The other thing to keep in mind that delemeters in ASCIIZ is actually show start on next part
and if there is no "next part" 0 becomes a delemeter or terminator.
Because for us end of string should be either start the next part or end of the string
we treat both delemeter and zero as terminators.
Delemetors usually has less ASCII code then most of contents of string that means
that we can filter out 9x% of "not zero" byte by just one cmp both for delemeter and
zero, yet we need to do additional check if "zero probability" found
cmp byte,delemeter
je @delemeterfound
jnc @again ;not zero
cmp byte,1 ;yet may be byte just less then delemeter? set cary if zero
;this code will run very rare by mistake
jc @error
....
@error: sbb eax,eax ; -1 return if error
now 'cause if a = b then a-b = 0 we can substruct 1 from the counter each time we
found a delemeter untill it 0 cause counter set to Param-1 we'll find Delemetor
wich is start of Part
@again: mov al,
inc esi
cmp al,delemeter
je @delemeterfound ;less prob
jnc @again ;high prob taken
cmp al,1
jnc @again ; lowest prob guessed as not taken
@error: sub eax,eax
ret
@delemetorfound:
dec ecx
jne @again ;esi pointed to next byte.
;send bytes to edi until esi <> 0 and <> delemeter
@copy:
I thought it was important to show process how some proc is created, while
doing research on subject of task and analyzing possible solutions.
I can not be 100% sure that anybody need such kind of work, so let me
know please - shall I continue with this analyze, or it's worthless?
Here is my crappy version (didn't check it written in paper)
should be faster is there are not many spaces:
should be faster is there are not many spaces:
ret Value: Success eax = 0
Error eax =-1
GetArg proc uses esi lpSource,lpDest,Param,delim
mov esi,lpSource
mov ecx,Param
mov edx,lpDest
sub ecx,1
mov ah,delim
je @Copy
jc @Error
@@: mov al,[esi]
inc esi
cmp al,ah
je @found
jns @B
cmp al,1
jnc @B
jmp @Error
dec ecx
@found: jne @B
@Copy: mov al,[esi][ecx]
inc ecx
mov [edx][ecx-1],al
cmp al,ah ;delimiter ?
je @putzero
jnc @Copy
test al,al
je @Error
jmp @Copy
@putzero:
mov byte ptr [edx][ecx-1],0
@Error: sbb eax,eax
ret
GetArg endp
Necessity of such procedure doubtfully. The procedure will reduce productivity of operation of the program which uses this procedure. I offer to consider the following units:
1. Usage for linear search of parameters.
More successful organization will be as two procedures: "GetFirstParameter", "GetNextParameter". Here there is no repeated scanning string.
2. Usage of a random access to parameters. Single scanning of string with fixing the beginning of strings in array ("ScanParameters"). "GetParameter" extracts the necessary string under number from the array of the beginnings of strings.
3. What for to duplicate parameters? I use methods of variation of string of parameters.
"param1,"
"param2,"
"param3", 0
vvv
"param1", 0
"param2", 0
"param3", 0
Changeover of a separator by null is fulfilled. Procedures "Get???Parameter" transmits the pointer by the beginning of the parameter to the initial string.
Combination of these units considerably will boost rate of machining and common productivity of the program.
1. Usage for linear search of parameters.
More successful organization will be as two procedures: "GetFirstParameter", "GetNextParameter". Here there is no repeated scanning string.
2. Usage of a random access to parameters. Single scanning of string with fixing the beginning of strings in array ("ScanParameters"). "GetParameter" extracts the necessary string under number from the array of the beginnings of strings.
3. What for to duplicate parameters? I use methods of variation of string of parameters.
"param1,"
"param2,"
"param3", 0
vvv
"param1", 0
"param2", 0
"param3", 0
Changeover of a separator by null is fulfilled. Procedures "Get???Parameter" transmits the pointer by the beginning of the parameter to the initial string.
Combination of these units considerably will boost rate of machining and common productivity of the program.
You are right, Nexo. But it is another topic.
Here I want to help the boy to achive what he wants by more
effective way.
As to parsing I think some programmers here use those parsings not for params in it's usual hence but for kinda database tasks
(why did he need such a long buffer for example?).
Which of course not good way to design, but as I said - it's another topic.
-----------------
To the proc - a little changes with assumed probability.
Here I want to help the boy to achive what he wants by more
effective way.
As to parsing I think some programmers here use those parsings not for params in it's usual hence but for kinda database tasks
(why did he need such a long buffer for example?).
Which of course not good way to design, but as I said - it's another topic.
-----------------
To the proc - a little changes with assumed probability.
;ret Value: Success - eax = 0
; Error eax =-1
invoke GetArg,offset Source,offset dest,2,","
invoke MessageBox,0,offset dest,0,0
call ExitProcess
GetArg proc uses esi lpSource,lpDest,Param,delim:BYTE
mov esi,lpSource
mov ecx,Param
mov edx,lpDest
sub ecx,1
mov ah,delim
je @Copy
jc @Error
@@: mov al,[esi]
inc esi
cmp al,ah
ja @B ;high prob
je @found ;rare prob
cmp al,1
jnc @B ;low prob
jmp @Error ;50\50
@found: dec ecx
jne @B
@Copy: mov al,[esi][ecx]
inc ecx
mov [edx][ecx-1],al
cmp al,ah ;delimiter ?
ja @Copy ;high proc
je @putzero ;only once
cmp al,1
jnc @Copy ;low proc
jmp @Error ;only once
@putzero:
mov byte ptr [edx][ecx-1],0
@Error: sbb eax,eax
ret
GetArg endp
Maybe someone do it faster
Here is my optimized method
BTW, there is no checking of bufout overflow
Here is my optimized method
.data
mess db "This is a string parsing test",0
.data?
bufout db 1024 dup(?)
.code
start:
mov edi, offset mess
mov edx, 32 ;delimiter
mov ebx, 7 ;number of string
call strlen
call GetStr
invoke MessageBox, 0, addr bufout, addr bufout, MB_OK
invoke ExitProcess,0
GetStr proc uses edx esi edi ebx
mov eax, edx
test ebx, ebx
jz @@first
@@next:
repne scasb
dec ebx
test ebx, ebx
jnz @@next
@@first:
push edi
mov edx, ecx
repne scasb
sub edx, ecx
mov ecx, edx
pop esi
mov edi, offset bufout
rep movsb
ret
GetStr endp
strlen proc uses edi
xor al, al
xor ecx, ecx
dec ecx
repnz scasb
not ecx
dec ecx
ret
strlen endp
end start
BTW, there is no checking of bufout overflow
Optimized for size? :)
'Cause it was obviously not speed but size optimization,
I didn't change anything that increasing speed would increase
size also.
One more thing - you code works only 'cause in a start there are
usualy zeros in .data? section - you need to add zero termination
otherwize problems could arise.
I didn't change anything that increasing speed would increase
size also.
One more thing - you code works only 'cause in a start there are
usualy zeros in .data? section - you need to add zero termination
otherwize problems could arise.
.data
mess db "This is a string parsing test",0
.data?
bufout db 1024 dup(?)
.code
start: mov edi, offset mess
; mov edx, 32 ;delimiter
mov ebx, 0 ;number of string
call strlen
mov al,32
call GetStr
invoke MessageBox, 0, addr bufout, addr bufout, MB_OK
invoke ExitProcess,0
GetStr proc uses edx esi edi ebx
; mov eax, edx
test ebx, ebx
jz @@first
@@next:
repne scasb
dec ebx
; test ebx, ebx - you don't need it dec already sets ZF
jnz @@next
@@first: push edi
mov edx, ecx
repne scasb
; sub edx, ecx
; mov ecx, edx
sub ecx,edx ;the same but paralelled
pop esi
neg ecx
mov edi, offset bufout
rep movsb
ret
GetStr endp
strlen proc uses edi
xor eax, eax
lea ecx,[eax-1] ;the same 3 bytes
; xor ecx, ecx
; dec ecx ;? mov ecx,-1
repnz scasb
not ecx
dec ecx
ret
strlen endp
OK, I'm agree with almost all your claims, Svin. Not with all, for example:
I think following code will be faster on proc higher than 386
And about speed and size. As I see your algo is operating with 16-bit registers, I dont think it is a fast way.
BTW, interesting problem arise if delimiter contain more than one byte
mov al, 32
I think following code will be faster on proc higher than 386
mov eax, 32
And about speed and size. As I see your algo is operating with 16-bit registers, I dont think it is a fast way.
BTW, interesting problem arise if delimiter contain more than one byte
There is no 16 bit regs in my algo. Only 32 and 8 bit.
8 bit regs is OK both for 16 and 32 bit mode (no additional prefix
no additional clock)
As to speed -
Test!
8 bit regs is OK both for 16 and 32 bit mode (no additional prefix
no additional clock)
As to speed -
Test!