I just finished a number to string conversion proc as a learning exercise.  My idea was to divide the number by 1 in the largest possible place value and then continue to divide reiteratively, dividing the divisor by 10 each time.  After each division I take the quotient and add 48 (the ascii table offset) to it to get that number's corresponding ascii character.  Then at the end taking the last number as is.

So that's my solution and it works like a charm.  My question now is:  is that the way it's usually done, or is there a less costly algorithm?  There used to be an algorithms section on this board but I couldn't find it.  Cheers!

Here's the code:

``;+----------------------------------------------------------------------------+;|  Title: NumberToString;|  In: dwNumberIn (Contains the dword value to be converted.);|      dwBufSizeIn (Contains the size, in bytes, of the buffer pointed to.);|  Out: pStringOut (Contains a pointer to the buffer receiving the string.);|  Returns: -2 on buffer to small, -1 on error, 0 if successful;|  Notes: Maximum DWORD value is 4,294,967,295 (0xFFFFFFFF);|  Author: R.Daneel      ;+----------------------------------------------------------------------------+NumberToString   proc    dwNumberIn:DWORD, pStringOut:DWORD, dwBufSizeIn:DWORD    LOCAL   dwRemainder:DWORD    LOCAL   dwQuotient:DWORD    LOCAL   dwNumber:DWORD    LOCAL   pCharPosition:DWORD    LOCAL   dwDivCount:DWORD    LOCAL   dwDivisor:DWORD    LOCAL   boolZeroFlag:DWORD    LOCAL   dwDigitCount:DWORD    ;## Set the zero keeper flag to FALSE    mov     boolZeroFlag, FALSE        ;## Set the initial digit count to zero    mov     dwDigitCount, 0    ;## Save the original number    push    dwNumberIn    pop     dwNumber        ;## Set the string buffer tracking variable    push    pStringOut    pop     pCharPosition        ;## Set the initial divisor to 1 billion and the div count to 9    mov     dwDivisor, 1000000000         mov     dwDivCount, 9       ;## Clear the output buffer with zeros    pushf    cld    mov     al, 0    mov     ecx, dwBufSizeIn    mov     edi, pStringOut    rep     stosb    popf          ;## Enter the loop to begin dividing down the number by 10's.  The idea is to    ;## divide the number again and again by descending 10's position and use the    ;## quotient as the basis for what number goes at that position.  Then we add    ;## 48 to that number to get to the corresponding ascii character.    .WHILE (dwDivCount > 0)        ;## See if this number is divisible by the divisor        nop        xor     eax, eax        xor     edx, edx         mov     eax, dwNumber        mov     ebx, dwDivisor        div     ebx        mov     dwRemainder, edx        mov     dwQuotient, eax                .IF ((dwQuotient > 0) && (boolZeroFlag != TRUE))            ;## Since we've hit a number greater than zero, now we need to            ;## display the zeros since they'll have significance.            mov     boolZeroFlag, TRUE                    .ENDIF                .IF ((dwQuotient > 0) || (boolZeroFlag == TRUE))            ;## See if we've reached the string size limit            inc     dwDigitCount            mov     eax, dwBufSizeIn            dec     eax            .IF (dwDigitCount > eax)                ;## Since there was an error, let's clear the output buffer                 ;## with zeros so we don't give false results.                pushf                cld                mov     al, 0                mov     ecx, dwBufSizeIn                mov     edi, pStringOut                rep     stosb                popf                            mov     eax, -2                ret            .ENDIF                                ;## This number was larger than the divisor so the quotient            ;## should tell us what the first character should be.  We need            ;## to add 48 to that number to get the equivelant ascii char.            ;## We then store the character in the first byte of the string.            xor     eax, eax            mov     eax, dwQuotient            add     eax, 48            mov     edi, pCharPosition            stosb                        ;## Increment to the next byte in the string buffer.            inc     pCharPosition                                    ;## Now subtract (quotient * divisor) from the original value            ;## to begin testing the next position.            .WHILE (dwQuotient > 0)                mov     eax, dwNumber                sub     eax, dwDivisor                mov     dwNumber, eax                dec     dwQuotient            .ENDW        .ENDIF                ;## Decrement the div count and divide the divisor by 10 to get the        ;## next 10's position to test        dec     dwDivCount        xor     eax, eax        xor     edx, edx         mov     eax, dwDivisor        mov     ebx, 10        div     ebx        mov     dwDivisor, eax         .ENDW       ;## Put the final remainder in the last string position    xor     eax, eax    mov     eax, dwNumber    add     eax, 48    mov     edi, pCharPosition    stosb            xor     eax, eax    retNumberToString   endp``
Posted on 2009-03-25 16:55:59 by rdaneel
Hi,
No offence, really, but this is acutally the longest and probably the slowest "dw2ascii" function I've ever seen ;) So yeah - there are better ways to do it :P First of all: avoid any divisions. Div is generally bad. Then delete this 'Clear the output buffer with zeros' because it's kinda pointless.

Look at MASM32's implementation of "dwtoa". It should give you some nice ideas :)
Posted on 2009-03-26 09:24:41 by ti_mo_n
A way to remove one division is to 'reverse' your algorithm.
You divide out digits from one end, starting with a big divisor, and then cutting down the divisor one digit at a time...
The number can be seen as this:
number = digit(n) * 10^n + digit(n-1) * 10^(n-1) + ... + digit(1) * 10^1 + digit(0) * 10^0

Looking at it another way:
digit(0) = number % 10;
digit(1) = (number/10) % 10;
digit(2) = (number/100) % 10;
...
digit(n) = number/(10^n) % 10;

Basically you can rewrite that as a sequential algorithm:
number(n) = number(n-1) / 10;
digit(n) = number(n) % 10;

This allows you to take advantage of the fact that % 10 and / 10 are a single div operation. Then you just have a single loop... The only difference is that you start filling your buffer from the other side.
Posted on 2009-03-26 10:23:44 by Scali
ti_mo_n:  none taken.  i am a terrible asm programmer.  this was mostly a mental exercise to see if I could reproduce this function without cheating and looking at the m32lib source.  now that I know a way to do it I feel free to cheat. :)  i planned on trying to convert those divs to some form of shr+mul after I confirmed that the layout worked.  thanks for the info.

scali:  this seems to be what the m32lib is doing if I read it correctly.  now I understand why he reversed the digits in that code.  great info.  thankyou.

- in general, I hate using pre-made code without knowing at least how it accomplishes what it's doing.  once i understand the concept then it doesn't feel like cheating to use those common functions.  maybe i'm just odd.  thanks again guys.
Posted on 2009-03-26 11:06:48 by rdaneel
On a side note (and interesting tidbit) I thought once like that too but unlike you I decided to peek in the m32lib and clearly remember one of my bestest WTF moments ever...  :lol:

``; #########################################################################      .386      .model flat, stdcall  ; 32 bit memory model      option casemap :none  ; case sensitive        wsprintfA PROTO C :DWORD,:VARARG        wsprintf equ <wsprintfA>    .data      fMtStrinG db "%lu",0    .code; #########################################################################dw2a proc dwValue:DWORD, lpBuffer:DWORD    ; -------------------------------------------------------------    ; convert DWORD to ascii string    ; dwValue is passed as a value, direct, indirect or in register    ; lpBuffer is the ADDRESS of the receiving buffer    ; EXAMPLE:    ; invoke dw2a,edx,ADDR buffer    ; -------------------------------------------------------------        invoke wsprintf,lpBuffer,ADDR fMtStrinG,dwValue    retdw2a endp; #########################################################################end``
Posted on 2009-03-26 11:29:29 by JimmyClif
LOL!!!  i did the same freakin thing.  I saw that call to wsprintf and thought cheater!  then i realized i was looking at the wrong file.  :oops:  good stuff. lol.
Posted on 2009-03-26 12:43:50 by rdaneel
You could probably use muls yea.
I don't think I ever tried that... but you could use fixedpoint arithmetic...
Namely:
1: x / 10 = x * (1/10)
Which can be rewritten to (x * (2^32/10)) / (2^32) == (x * (2^32/10)) >> 32
And
2: x % 10 = x - ((int)(x/10)*10)

Substitute x/10 with the first equation, and you have completely removed the divisions from the algorithm.
The x*10 operation can also be replaced with some shifts or adds...
Eg:
y = x + x // x * 2
z = x << 3 // x * 8
x = x + y // x * (2 + 8 )

Depending on the CPU used, these elaborate substitutions might actually be faster than using a div or mul directly.
Posted on 2009-03-26 12:56:41 by Scali
I tried my hand at doing it with muls only, no divs...
It works, gets a tad inaccurate for numbers close to 2^32, but I've put in a correction factor, so it always gives the right result. If you were to optimize it, you could split the algo in two... If you find out where the instability starts, you can just use the quick-and-dirty version for all small numbers, and use the correction only for large numbers.
Posted on 2009-03-26 15:42:05 by Scali

I tried my hand at doing it with muls only, no divs...

What about only one multiplication and no loops :)
http://www.masm32.com/board/index.php?topic=9951.msg73240#msg73240
Posted on 2009-03-26 16:45:01 by drizz

I tried my hand at doing it with muls only, no divs...

What about only one multiplication and no loops :)
http://www.masm32.com/board/index.php?topic=9951.msg73240#msg73240

I'd rather hear the explanation than see the code.
Posted on 2009-03-26 16:58:44 by Scali
Sure,

maximum unsiged 32bit number is 4294967295, thats 10 digits in base 10
im using fixed point arithmetic to get decimal digit in the last 4 bits of a 64bit number

only small difference is handling the first digit, it can be max 4(10)
i'm going to multiply the number by 2(64-A)/B
where A is the number of bits of number 4
4(10) = 100(2), that is 3 bits, A=3
B = 10Int(Log10(4294967295))

2^(64-A)/B = 0x089705F41

after handling the first digit, we get subsequent digits by multiplying with 10
the digit is in the last nibble of 64bit number (last 4 bits)

``dw2ascii(DWORD val){	//val=4294967295;	QWORD x,y;	DWORD i,j;		x=(QWORD)val*0x089705F41; // mul by 2^(64-3)/1000000000	x+=0x70000000; // this is only needed for when val is very big ~~ 4294967295 	i=x>>(64-3); // last 3 bits == either 0,1,2,3 or 4	printf("%u",i);	// move the number in position and clear out the last nibble (4 bits)	x>>=1; 	x=x&0xFFFFFFFFFFFFFFF;// mask out everything but the last nibble	x*=10;// multiply by 10 to get next digit in the last nibble	i=x>>(64-4);	printf("%u",i);		x=x&0xFFFFFFFFFFFFFFF;	x*=10;	i=x>>(64-4);	printf("%u",i);	x=x&0xFFFFFFFFFFFFFFF;	x*=10;	i=x>>(64-4);	printf("%u",i);	x=x&0xFFFFFFFFFFFFFFF;	x*=10;	i=x>>(64-4);	printf("%u",i);	x=x&0xFFFFFFFFFFFFFFF;	x*=10;	i=x>>(64-4);	printf("%u",i);	x=x&0xFFFFFFFFFFFFFFF;	x*=10;	i=x>>(64-4);	printf("%u",i);	x=x&0xFFFFFFFFFFFFFFF;	x*=10;	i=x>>(64-4);	printf("%u",i);	x=x&0xFFFFFFFFFFFFFFF;	x*=10;	i=x>>(64-4);	printf("%u",i);	x=x&0xFFFFFFFFFFFFFFF;	x*=10;	i=x>>(64-4);	printf("%u",i);	}``

Oh, and full 64bits are only needed for first 2 digits, afterwards lower dword can be ignored
Posted on 2009-03-26 18:26:47 by drizz
Yea, thanks.
I half figured out your code... I saw that you were multiplying by 10 all the time, extracting one digit from the most significant part of the register.
That made me think that you used a constant to basically scale 1000000000 up to 0x10000000 (so most significant decimal digit becomes most significant hexadecimal digit.. in a way... and that means the top 4 bits).

I didn't really have the time yet to fully analyse your constant, but I did see there was some kind of special case. And right, good observation that the top digit can only be 0-4, so you need less bits than for the rest.

One thing though... Why exactly do you do this:
x+=0x70000000; // this is only needed for when val is very big ~~ 4294967295

I suppose 0x700000000 is the equivalent of 0.5 in the upscaled number? So basically you use it to round to nearest (as a proper div would do), rather than rounding down?

At any rate, it's a very elegant routine... If it's completely robust for all numbers between 0 and 2^32, that'd be great.
I bet if I write out all my multiplications into a formula, this relation will somehow follow from it. I used the 'reverse' method like with divisions, starting out with the lowest digit first. But for multiply, using the highest digit first makes a lot more sense.
Posted on 2009-03-27 02:50:46 by Scali
DIV isn't too bad on newer cpu's but using SSEx (if you can) is the way to go.
Posted on 2009-03-27 02:59:59 by sinsi

Yea, thanks.
I half figured out your code... I saw that you were multiplying by 10 all the time, extracting one digit from the most significant part of the register.
That made me think that you used a constant to basically scale 1000000000 up to 0x10000000 (so most significant decimal digit becomes most significant hexadecimal digit.. in a way... and that means the top 4 bits).

I didn't really have the time yet to fully analyse your constant, but I did see there was some kind of special case. And right, good observation that the top digit can only be 0-4, so you need less bits than for the rest.

One thing though... Why exactly do you do this:
x+=0x70000000; // this is only needed for when val is very big ~~ 4294967295

I suppose 0x700000000 is the equivalent of 0.5 in the upscaled number? So basically you use it to round to nearest (as a proper div would do), rather than rounding down?

At any rate, it's a very elegant routine... If it's completely robust for all numbers between 0 and 2^32, that'd be great.
I bet if I write out all my multiplications into a formula, this relation will somehow follow from it. I used the 'reverse' method like with divisions, starting out with the lowest digit first. But for multiply, using the highest digit first makes a lot more sense.

You are correct in your observations, and the function works for all numbers in [0 , 232-1] interval.
0x70000000 value is empiricaly chosen and yes its for rounding, sorry for the misleading comment

``dw2ascii(DWORD val,BYTE *pbuff){	//DWORD val=4294967295;	QWORD x;	DWORD i,j;		x=(QWORD)val*0x089705F41; // mul by 2^(64-3)/1000000000	x+=0x70000000;	i=x>>(64-3); // last 3 bits == either 0,1,2,3 or 4		pbuff=i+0x30;		x>>=1; 	// clear out the last nibble (4 bits)	x=x&0xFFFFFFFFFFFFFFF;// mask out everything but the last nibble	x*=10;// multiply by 10 to get next digit in the last nibble	i=x>>(64-4);		pbuff=i+0x30;		j=(x>>32)&0xFFFFFFF;	j*=10;	i=j>>(32-4);	pbuff=i+0x30;	j&=0xFFFFFFF;	j*=10;	i=j>>(32-4);	pbuff=i+0x30;	j&=0xFFFFFFF;	j*=10;	i=j>>(32-4);	pbuff=i+0x30;	j&=0xFFFFFFF;	j*=10;	i=j>>(32-4);	pbuff=i+0x30;	j&=0xFFFFFFF;	j*=10;	i=j>>(32-4);	pbuff=i+0x30;	j&=0xFFFFFFF;	j*=10;	i=j>>(32-4);	pbuff=i+0x30;	j&=0xFFFFFFF;	j*=10;	i=j>>(32-4);	pbuff=i+0x30;	j&=0xFFFFFFF;	j*=10;	i=j>>(32-4);	pbuff=i+0x30;		pbuff=0;}``

Posted on 2009-03-27 07:37:57 by drizz
Thanks for the explanation.
It was fun trying to dissect your code, and making my own version. I'll try to complete my version tonight, now that the last few details have been worked out (hopefully)... It sorta worked, but wasn't accurate enough for all digits.

I think it's fun and very useful to solve problems without using the 'obvious' solution, based on simple maths.
In the past, I used a Commodore 64, which didn't even have instructions for mul and div at all. You could only add, sub and shift (one bit at a time).
Since I had to write my own mul and div routines, it gave me a nice insight in how various operations relate, and how analysing bit-patterns can be useful in skipping certain operations, or rewriting things for more efficient instructions.
Posted on 2009-03-27 09:29:49 by Scali
You don't have to tell me that assembly is fun  :P

For the brave ones that want to apply this tehnique for 64bit number-to-string
2^127/10^19 = 17014118346046923173 (0EC1E4A7DB69561A5H) is a nice constant

here is a quick test that shows it works for 64bits too (watch the leftmost nibble)
FFFFFFFFFFFFFFFF(16) = 18446744073709551615(10)
EC1E4A7DB69561A5 * FFFFFFFFFFFFFFFF
= EC1E4A7DB69561A413E1B582496A9E5B
------------------------------------
= EC1E4A7DB69561A413E1B582496A9E5B .
= 1D83C94FB6D2AC34827C36B0492D53CB .
= 8725DD1D243ABA0D18DA22E2DBC545EE
= 477AA3236A4B448C
= 6BE7B9D58566C6B0
= 770D42573603C2E0
= 468497681C259CC0
= 412DEA1119781F80
= 0BCB24AAFEB13B00
= 3B5A52CB7D3B0C00
= 71873BF2E44E7800
= 0F48577CEB10B000
= 98D36AE12EA6E000
= 58422CCBD284C000
= 5295BFF6392F8000
= 19D97F9E3BDB0000
= 627EFC2E568E0000
= 18F5D9CF618C0000
= 599A8219CF780000

again for first 2 digits we need 128bits and afterwards only 64.
Posted on 2009-03-27 10:36:41 by drizz
ok, here is a test version of 64bitNumberToString, needs further testing and optimizing.

``__declspec(naked) __stdcall _U64ToStr (QWORD val, BYTE *pbuff){	__asm	{		#define __locals 4*4+4*4//4*4=regs + 4*4=128bit temp		#define __saveregs dword ptr 		#define __128bittmp dword ptr 		#define __val_hi dword ptr 		#define __val_lo dword ptr 		#define __pbuff dword ptr 		#define __cnst_hi 0xEC1E4A7D		#define __cnst_lo 0xB69561A5		#define __correction_lo 0xE30437FB		#define __correction_hi 0xAD7B4F1B		sub esp,__locals		mov __saveregs[0*4],ebp		mov __saveregs[1*4],esi		mov __saveregs[2*4],edi		mov __saveregs[3*4],ebx		//		// 64bit x 64bit = 128bit result		// -------------------------------		mov esi,__val_lo//__x_lo		mov edi,__val_hi//__x_hi		mov eax,__cnst_lo//__y_lo; = b0		mul esi;// get a0*b0 = d1:d0		//mov ebp,__r_lo		mov __128bittmp[0*4],eax;//d0		mov ecx,edx;//d1		mov eax,__cnst_lo//__y_lo; = b0		xor ebx,ebx		mul edi;// get a1*b0 = e1:e0		add ecx,eax;//e0		adc ebx,edx;//e1		mov eax,__cnst_hi//__y_hi; =b1		mul esi;// get a0*b1 = f1:f0		add ecx,eax;//f0		adc ebx,edx;//f1		mov __128bittmp[1*4],ecx		mov ecx,0		mov eax,__cnst_hi//__y_hi; =b1		adc ecx,ecx		//mov ebp,__r_hi		mul edi;// get a1*b1 = g1:g0		add eax,ebx;//g0		adc edx,ecx;//g1		mov __128bittmp[2*4],eax		mov __128bittmp[3*4],edx		// -------------------------------		xor eax,eax		add __128bittmp[0*4],__correction_lo		adc __128bittmp[1*4],__correction_hi		adc __128bittmp[2*4],eax		adc __128bittmp[3*4],eax		// -------------------------------		mov eax,'0'		mov edx,__128bittmp[3*4]		add edx,edx//high dword of high qword		adc eax,0// first digit 0 or 1		mov ebp,__pbuff		mov ,al 		shr edx,1 		//mov __128bittmp[3*4],edx 		//		mov esi,__128bittmp[0*4]		mov edi,__128bittmp[1*4]		mov eax,__128bittmp[2*4]		//mov edx,__128bittmp[3*4]				// move into position		shrd esi,edi,3		shrd edi,eax,3		shrd eax,edx,3		shr edx,3				// mul by 10				mov __128bittmp[0*4],esi		mov __128bittmp[1*4],edi		mov __128bittmp[2*4],eax		mov __128bittmp[3*4],edx		shld edx,eax,2		shld eax,edi,2		shld edi,esi,2		shl esi,2		add esi,__128bittmp[0*4]		adc edi,__128bittmp[1*4]		adc eax,__128bittmp[2*4]		adc edx,__128bittmp[3*4]		add esi,esi		adc edi,edi		adc eax,eax		adc edx,edx				mov ebx,edx		and edx,0x0FFFFFFF		shr ebx,32-4		add ebx,'0'		mov byte ptr ,bl				// mul by 10				mov __128bittmp[0*4],esi		mov __128bittmp[1*4],edi		mov __128bittmp[2*4],eax		mov __128bittmp[3*4],edx		shld edx,eax,2		shld eax,edi,2		shld edi,esi,2		shl esi,2		add esi,__128bittmp[0*4]		adc edi,__128bittmp[1*4]		adc eax,__128bittmp[2*4]		adc edx,__128bittmp[3*4]		add esi,esi		adc edi,edi		adc eax,eax		adc edx,edx				mov ebx,edx		and edx,0x0FFFFFFF		shr ebx,32-4		add ebx,'0'		mov byte ptr ,bl				#define shouldbeunrolled 17		mov ecx,-shouldbeunrolled	_L0:		mov esi,eax;		mov edi,edx;		shld edx,eax,2;		shl eax,2;		add eax,esi;		adc edx,edi;		add eax,eax;		adc edx,edx;		mov ebx,edx;		and edx,0x0FFFFFFF;		shr ebx,32-4;		add ebx,'0';		mov byte ptr ,bl;		inc ecx		jnz _L0				mov byte ptr ,0		// -------------------------------		mov ebp,__saveregs[0*4]		mov esi,__saveregs[1*4]		mov esi,__saveregs[2*4]		mov ebx,__saveregs[3*4]		add esp,__locals		ret 8+4	}}``
Posted on 2009-03-29 11:54:30 by drizz