I'm putting all the tested string related code over here.
To start with the famous string length function from Agner Fog:
String Length
To start with the famous string length function from Agner Fog:
String Length
;+--------------------------------------------------------
;
; _cstrlen
;
; Description:
; Determines the length of a null-terminated
; string.
;
; Prototype:
; 1. C:
; extern "C"
; DWORD __stdcall
; _cstrlen(
; char *lpszString // input string
; );
;
; Params:
; lpszString
; pointer to string.
;
; Returns:
; DWORD value in EAX.
;
; Revision History:
; 01-11-2000: Slightly modified by Jeremy
; Collake (lowercase code)
; to force dword alignment.
;
; Notes:
; The proc is exported with name mangling
; enabled, and another alias for it has been
; created for it in the .def file like this:
; "_cstrlen=__cstrlen@4"
; Both call the same function.
;
;--------------------------------------------------------------------
align 4
_cstrlen PROC export uses ebx edx lpszString:DWORD
MOV EAX,lpszString ; get pointer to string
LEA EDX,[EAX+3] ; pointer+3 used in the end
mov ebx,[eax]
add eax,4
lea ecx,[ebx-01010101h]
not ebx
and ecx,ebx
and ecx,80808080h
jnz j1
shr eax,2
shl eax,2
L1: MOV EBX,[EAX] ; read first 4 bytes
ADD EAX,4 ; increment pointer
LEA ECX,[EBX-01010101H] ; subtract 1 from each byte
NOT EBX ; invert all bytes
AND ECX,EBX ; and these two
AND ECX,80808080H ; test all sign bits
JZ L1 ; no zero bytes, continue loop
j1:
TEST ECX,00008080H ; test first two bytes
JNZ SHORT L2
SHR ECX,16 ; not in the first 2 bytes
ADD EAX,2
L2: SHL CL,1 ; use carry flag to avoid branch
SBB EAX,EDX ; compute length
RET
_cstrlen ENDP
Art Sands,
I am submitting this version of the string length subroutine. I tested it and it appears to work OK. Ratch
I am submitting this version of the string length subroutine. I tested it and it appears to work OK. Ratch
;-------------------------------------------------------------------------------
;*******************************************************************************
; STRLEN: Calculates the length of a ASCII string terminated by a zero byte.*
; *
; Called by: PUSH <address of string> *
; CALL STRLEN *
; *
; Returns: EAX=String Length--Length does NOT include zero byte terminator. *
; *
; Notes: This subroutine conforms with WIN32 conventions regarding *
; register preservation and stack balancing. *
; *
; Coder: Ratch *
;*******************************************************************************
STRLEN: ;it all begins here
MOV EAX,[ESP+DWORD] ;address of string
.REPEAT ;searching string ....
MOV EDX,[EAX] ;next 4 byte gulp (DWORD)
ADD EAX,DWORD ;EAX=character pointer
LEA ECX,[EDX-01010101H] ;propagate if byte is zero
NOT EDX ;set up test pattern
AND EDX,ECX ;leftmost bit of zero byte should now be set
AND EDX,080808080H ;sieve out zero bytes
.UNTIL !ZERO? ;check the next DWORD
;we now know the current DWORD contains a zero byte
SUB EAX,DWORD+BYTE ;compensate for entry additions into loops
.REPEAT ;searching current DWORD ...
INC EAX ;increase word count if no carry from shift
ROR EDX,8 ;bring leftmost bit of each byte into carry flag
.UNTIL CARRY? ;if carry flag set, byte is zero
SUB EAX,[ESP+DWORD] ;subtract beginning of string to get length
RET DWORD ;all done, and string length in EAX
;return to sender and balance the stack
;-------------------------------------------------------------------------------
page ,132
title strlen - return the length of a null-terminated string
;***
;strlen.asm - contains strlen() routine
;
; Copyright (c) Microsoft Corporation. All rights reserved.
;
;Purpose:
; strlen returns the length of a null-terminated string,
; not including the null byte itself.
;
;*******************************************************************************
.xlist
include cruntime.inc
.list
page
;***
;strlen - return the length of a null-terminated string
;
;Purpose:
; Finds the length in bytes of the given string, not including
; the final null character.
;
; Algorithm:
; int strlen (const char * str)
; {
; int length = 0;
;
; while( *str++ )
; ++length;
;
; return( length );
; }
;
;Entry:
; const char * str - string whose length is to be computed
;
;Exit:
; EAX = length of the string "str", exclusive of the final null byte
;
;Uses:
; EAX, ECX, EDX
;
;Exceptions:
;
;*******************************************************************************
CODESEG
public strlen
strlen proc
.FPO ( 0, 1, 0, 0, 0, 0 )
string equ [esp + 4]
mov ecx,string ; ecx -> string
test ecx,3 ; test if string is aligned on 32 bits
je short main_loop
str_misaligned:
; simple byte loop until string is aligned
mov al,byte ptr [ecx]
add ecx,1
test al,al
je short byte_3
test ecx,3
jne short str_misaligned
add eax,dword ptr 0 ; 5 byte nop to align label below
align 16 ; should be redundant
main_loop:
mov eax,dword ptr [ecx] ; read 4 bytes
mov edx,7efefeffh
add edx,eax
xor eax,-1
xor eax,edx
add ecx,4
test eax,81010100h
je short main_loop
; found zero byte in the loop
mov eax,[ecx - 4]
test al,al ; is it byte 0
je short byte_0
test ah,ah ; is it byte 1
je short byte_1
test eax,00ff0000h ; is it byte 2
je short byte_2
test eax,0ff000000h ; is it byte 3
je short byte_3
jmp short main_loop ; taken if bits 24-30 are clear and bit
; 31 is set
byte_3:
lea eax,[ecx - 1]
mov ecx,string
sub eax,ecx
ret
byte_2:
lea eax,[ecx - 2]
mov ecx,string
sub eax,ecx
ret
byte_1:
lea eax,[ecx - 3]
mov ecx,string
sub eax,ecx
ret
byte_0:
lea eax,[ecx - 4]
mov ecx,string
sub eax,ecx
ret
strlen endp
end
evil__donkey,
Yikes! What qualifications do they require of their programmers? Are the APIs written that badly too? Ratch
Yikes! What qualifications do they require of their programmers? Are the APIs written that badly too? Ratch
well it's from the c runtime, and you know Microsoft. they write clean but shit code. btw, can you explain why they are aligning the string here.
can you explain why they are aligning the string here.
From Agner Fog's manual: "On P1 and PMMX, misaligned data will take at least 3 clock cycles extra to access if a 4-byte boundary is crossed." Aligning the string (if required) should speed up their main_loop on older processors.
Frank,
OK, I added string alignment to the program I submitted earlier. The code to do this can be switched on or off depending on the need. Also the two small loops can be rolled or unrolled by a switch for speed or space. The choice is yours. Ratch
OK, I added string alignment to the program I submitted earlier. The code to do this can be switched on or off depending on the need. Also the two small loops can be rolled or unrolled by a switch for speed or space. The choice is yours. Ratch
Anyway, the strlen posted by evil_donkey does look like the one posted by lingo whom claims that piece of code is the fastest strlen. http://www.asmcommunity.net/board/index.php?topic=8330
Humm, is it legal to re-port the MS CRT sourcecode?
ms provides the source code for free with vs .net, and so long as you include the copyright, i think it should be ok.
Here is a subroutine to find the first instance of multiple characters in a string with only one string scan.
INVOKIT MFNDCHR,@ STRADR,'a','A','e','E','i','I','o','O','u','U',0 ;find first instance of a vowel in a string
INVOKIT MFNDCHR,@ STRADR,'.','!','?',0 ;parse out a sentence
Note the caution about always PUSHing a zero onto the stack first!
Notice the nonstandard coding to return from the subroutine. Ratch
INVOKIT MFNDCHR,@ STRADR,'a','A','e','E','i','I','o','O','u','U',0 ;find first instance of a vowel in a string
INVOKIT MFNDCHR,@ STRADR,'.','!','?',0 ;parse out a sentence
Note the caution about always PUSHing a zero onto the stack first!
Notice the nonstandard coding to return from the subroutine. Ratch
;-------------------------------------------------------------------------------
;*******************************************************************************
; MFNDCHR: Returns the address of the first character found within a string *
; from a list of characters PUSHed onto the stack. *
; *
; Called by: INVOKIT MFNDCHR,@ STRADR,<char>,<char>,<char>,<char>,...,0 *
; *
; Example: INVOKIT MFNDCHR,@ STRADR,'a','A','e','E','i','I','o','O','u','U',0*
; *
; Returns: EAX=address, NOT the index of the character within the string. To *
; get the index, subtract the string address from EAX. *
; *
; Notes: A parameter of 0 MUST be PUSHed onto the stack FIRST! *
; This marks the end of the stack and also searches for the string *
; terminator. Failure to do this will result in performance error. *
; *
; This subroutine conforms with WIN32 conventions regarding *
; register preservation and stack balancing. *
; *
; Coder: Ratch *
;*******************************************************************************
MFNDCHR$1 STRUCT
DWORD 4 DUP (?) ;register save area
RETURN DWORD ?
STRADR DWORD ?
FIRSTP DWORD ?
; .... DWORD ? ;any number of parameter chars can be pushed onto stack
MFNDCHR$1 ENDS
S$1 EQU ESP.MFNDCHR$1 ;save some typing
.CODE
MFNDCHR: ;it all begins here
RPUSHIT EBX,EBP,ESI,EDI ;save those registers
MOV EBP,01010101H ;replication constant
MOV EDI,[S$1.STRADR] ;address of string
.REPEAT ;searching string ....
LEA ESI,[S$1.FIRSTP] ;pointer to char list
XOR EBX,EBX ;clear accumulation mask
.REPEAT ;searching DWORD for chars from list
MOV EAX,[ESI] ;next char to check
MUL EBP ;replication operation
MOV ECX,EBP ;@@@ prepare for propagation operation
MOV EDX,[EDI] ;next DWORD
NEG ECX ;@@@ prepare for propagation operation
XOR EDX,EAX ;add to accumulation mask
ADD ECX,EDX ;@@@ propagation operation
; LEA ECX,[EDX-01010101H] ;this instruction replaces the 3 above marked with @@@
NOT EDX ;prepare for propagation operation
AND EDX,ECX ;propagation operation
ADD ESI,DWORD ;increment pointer for chars to check
OR EBX,EDX ;update accumulation mask
TEST EAX,EAX ;check if done with char list
.UNTIL ZERO?
ADD EDI,DWORD ;increment pointer for DWORD of string
AND EBX,80808080H ;sieve out zero bytes from accumulation mask
.UNTIL !ZERO? ;check for successful search
.REPEAT ;searching current DWORD ...
INC EDI ;increase char count if no carry from shift
ROR EBX,8 ;bring leftmost bit of each byte into carry flag
.UNTIL CARRY? ;if carry flag set, at least one byte is zero
LEA EDX,[S$1.STRADR] ;prepare to compute number of params pushed
LEA EAX,[EDI-DWORD-BYTE] ;adjust found char address
SUB EDX,ESI ;EDX=negative value of number of params pushed
POPIT EBX,EBP,ESI,EDI ;restore those registers
POP ECX ;ECX=return address
SUB ESP,EDX ;balance stack
JMP ECX ;return to sender
;-------------------------------------------------------------------------------
here is a nice small strlen: http://modseven.de/pastebin.php?id=25