"I am wondering if someone could produce one version which is faster.." by roticv


;Usage:
; push 0 ; always 0 !!!
; push offset szString2 ; address of second string
; push offset szString1 ; address of first string
; push offset szTargetBuffer ; address of target buffer
; call CatStrA ;
; or ;
; invoke CatStrA, addr szTargetBuffer, addr szString1, addr szString2, 0
;
;[B]Note:[/B] If you want to use more Strings for instance 4
; just redefine the proc as:
; CatStrA proto lpTarget:DWORD, lpSource1:DWORD, lpSource2:DWORD,
; lpSource3:DWORD, lpSource4:DWORD,
; nZero:DWORD
; and modify:
; ret 2*4+[B]2*4[/B] to ret 2*4+[B]4*4[/B] ; !!!
; Now you can use it as:
; invoke CatStrA, addr szTargetBuffer, addr szString1,
; addr szString2, addr szString3, addr szString4, 0
;
; and next:
; if you have 2 Strings:
; invoke CatStrA, addr szTargetBuffer, addr szString1,
; addr szString2, [B]0, 0,[/B] 0
; if you have 3 Strings:
; invoke CatStrA, addr szTargetBuffer, addr szString1,
; addr szString2, addr szString3, [B]0[/B], 0
;
; szTargetBuffer, szString1, szString2, szString3 and szString4
; should be aligned to 4 !!!
;
OPTION PROLOGUE:NONE ; turn it off
OPTION EPILOGUE:NONE ;
Align 16 ; Align 16 before the proc
CatStrA proc lpTarget:DWORD, lpSource1:DWORD, lpSource2:DWORD, nZero:DWORD
mov edx, [esp+1*4] ; edx->lpTarget
lea ecx, [esp+3*4] ; ecx->stack address of lpSource2
mov [esp+1*4], edi ; save edi
mov edi, [esp+2*4] ; edi->lpSource1..2..3..n
LoopDwords: ;
mov eax, [edi] ; edi->lpSource1..2..3..n
add edx, 4 ;
add edi, 4 ;
mov [edx-4], eax ; edx->lpTarget
add eax, 0FEFEFEFFh ;
test eax, 80808080h ;
je LoopDwords ; 1 loop->3 clocks per 4 bytes
cmp byte ptr [edi-4], 0 ;
je M6_minus4 ;
cmp byte ptr [edi-4+1], 0 ;
je M6_minus3 ;
cmp byte ptr [edi-4+2], 0 ;
je M6_minus2 ;
cmp byte ptr [edi-4+3], 0 ;
jne LoopDwords ; if not zeroes loop again
dec edx ;
GetNextStr: ;
mov edi, [ecx] ; edi->lpSource1..2..3..n
add ecx, 4 ; ecx-> stack address
mov eax, edx ;
test edi, edi ; is it last string address?
je ExitCatStr ;
and eax, 3 ; is it aligned by 4?
je LoopDwords ;
LoopByte: ;
mov al, [edi] ; edi->lpSource1..2..3
inc edi ;
mov [edx], al ; edx->lpTarget
add edx, 1 ;
test al, al ; is it end of string?
jne LoopByte ;
dec edx ;
jno GetNextStr ;
nop ;
M6_minus4: ;
add edx, -4 ;
jno GetNextStr ;
M6_minus3: ;
add edx, -3 ;
jno GetNextStr ;
M6_minus2: ;
add edx, -2 ;
jno GetNextStr ;
nop ;
ExitCatStr: ;
mov edi, [esp+1*4] ; restore edi
ret 2*4+[B]2*4[/B] ; Modify here(If you have more parameters)!!!
CatStrA endp ;
OPTION PROLOGUE:PROLOGUEDEF ; turn back on the defaults
OPTION EPILOGUE:EPILOGUEDEF ;


Regards,
Lingo
Posted on 2003-06-23 17:41:36 by lingo12
quality :alright:

perhaps it would be useful to have a varargs version, just by making it cdecl- is there any speed difference between cdecl and stdcall?
Posted on 2003-06-24 12:24:25 by stormix
Really cool lingo12! Do you have profile data as well?
I think it would be interesting to set up a test rig.
Posted on 2003-06-24 17:04:48 by Poimander
Thanks guys,
And my smallest version:


OPTION PROLOGUE:NONE ; turn it off
OPTION EPILOGUE:NONE ;
CatStrAS proc lpTarget:DWORD, lpSource1:DWORD, lpSource2:DWORD, nZero:DWORD
push esi
push edi
cld
lea esi, [esp+3*4]
lodsd
xchg edi, eax
LStack: lodsd
xchg ecx, eax
jecxz ExitAS
xchg esi, ecx
LoopAS: lodsb
stosb
test al, al
jnz LoopAS
dec edi
mov esi, ecx
jno LStack
ExitAS: pop edi
pop esi
ret 2*4+2*4 ; Modify here (If you have more parameters) !!!
CatStrAS endp ; 30 bytes
OPTION PROLOGUE:PROLOGUEDEF ; turn back on the defaults
OPTION EPILOGUE:EPILOGUEDEF ;

Regards,
Lingo
Posted on 2003-06-24 18:17:55 by lingo12
stormix's suggestion is quite true.

It would be much better like the following and let the caller clear the stack themselves.


OPTION PROLOGUE:NONE ; turn it off
OPTION EPILOGUE:NONE ;
CatStrAS proc lpTarget:DWORD, :VARARG
push esi
push edi
cld
lea esi, [esp+3*4]
lodsd
xchg edi, eax
LStack: lodsd
xchg ecx, eax
jecxz ExitAS
xchg esi, ecx
LoopAS: lodsb
stosb
test al, al
jnz LoopAS
dec edi
mov esi, ecx
jno LStack
ExitAS: pop edi
pop esi
retn
CatStrAS endp
OPTION PROLOGUE:PROLOGUEDEF ; turn back on the defaults
OPTION EPILOGUE:EPILOGUEDEF ;
Posted on 2003-06-26 06:07:23 by roticv
   pop edx

pop edi
pop esi
_0:lodsb
stosb
test al,al
jne _0
pop esi
dec edi
test esi,esi
jne _0
jmp edx
Can someone trim a byte? I would like it to be 16 bytes, please.
Posted on 2003-06-27 09:31:00 by bitRAKE
12 bytes - strcat1

strcat is unstable unless we want to modify the code at runtime everytime it's used..., use strcat1 instead and don't forget to change the labels that were being used. *maybe a macro to facilitate would be nice* :grin:. The 3rd parameter being pushed is a constant.
.386

.model flat, stdcall
option casemap :none ; case sensitive

include c:\masm32\include\windows.inc
INCLUDE C:\masm32\include\kernel32.inc
INCLUDELIB C:\masm32\lib\kernel32.lib
INCLUDE C:\masm32\include\user32.inc
INCLUDELIB C:\masm32\lib\user32.lib

.DATA

x DB "1", 0
y DB "2881", 0

.DATA?

buffer db 16 dup(?)

.code

strcat:
pop edi
__cat:
pop esi
__lp1:
lodsb
stosb
test al,al
jne __lp1
dec edi
mov WORD PTR [$-1], 0AEBh
jmp __cat

strcat1:
pop edi
__cat1:
pop esi
pop edx
__lp3:
lodsb
stosb
test al,al
jne __lp3
dec edi
jmp edx

start:

push __exit_strcat
push OFFSET x
push OFFSET buffer
push OFFSET buffer
jmp strcat
__exit_strcat:

invoke MessageBox, 0, OFFSET buffer, 0, 0

push __exit_strcat1
push OFFSET y
push __cat1
push OFFSET buffer
push OFFSET buffer
jmp strcat1
__exit_strcat1:

invoke MessageBox, 0, OFFSET buffer, 0, 0
invoke ExitProcess, 0

end start
is this cheating??? :grin:
Posted on 2003-06-27 15:16:09 by arkane
Small strcat:

[size=12]invoke lstrcat, string1, string2[/size]


;)
Posted on 2003-06-27 20:03:03 by iblis
20 bytes
strcat2:

pop edx
pop edi
mov esi, edi
__catfs:
lodsb
stosb
test al,al
jne __catfs
dec edi
pop esi
__catss:
lodsb
stosb
test al,al
jne __catss
jmp edx


...

push OFFSET y
push OFFSET buffer
call strcat2

push OFFSET x
push OFFSET buffer
call strcat2
same usage as lstrcat... :grin:
Posted on 2003-06-27 21:11:17 by arkane
I wasn't reading the thread, so I missed on the idea of having arbitrary parameters - 15 bytes
strcat3:

pop edi
pop esi
_0:lodsb
stosb
test al,al
jne _0
pop esi
dec edi
test esi,esi
jne _0
ret

start:

push __exit_strcat3
push 0
push OFFSET x
push OFFSET y
push OFFSET buffer
jmp strcat3
__exit_strcat3:
modified version of bitrake's algo
Posted on 2003-06-27 21:29:10 by arkane