Hi to all!
How to convert text, written in unicode (utf-8 or ucs-2 little endians or ucs-2 big endians) to ansi?
Thanks,
Mike
How to convert text, written in unicode (utf-8 or ucs-2 little endians or ucs-2 big endians) to ansi?
Thanks,
Mike
Mike,
Have a look at the specs for the API WideCharToMultiByte().
Have a look at the specs for the API WideCharToMultiByte().
;==============================================================================
; convert.asm - description
; -------------------
; begin :Sat May 21 ,2005
; copyright :Webring (C) 2005
; email :Webring@email.com
; website :http://www.32bit.tk
; about :Ansi2Unicode,Unicode2Ansi
;
;
;==============================================================================
.386
.model flat, stdcall
option casemap:none ; Case sensitive
include \Masm32\include\windows.inc
include \Masm32\include\kernel32.inc
include \Masm32\include\user32.inc
includelib \Masm32\lib\user32.lib
includelib \Masm32\lib\kernel32.lib
Ansi2Unicode proto :DWORD,:DWORD
Unicode2Ansi proto :DWORD,:DWORD
.data
str1 db "mystring",0
ascii db "ascii",0
.data?
obuf db 1024 dup(?)
obuf2 db 1024 dup(?)
.code
start:
invoke Ansi2Unicode,addr str1,addr obuf
invoke MessageBoxW,0,addr obuf,addr obuf,MB_OK ;unicode MessageBox
invoke Unicode2Ansi,addr obuf,addr obuf2
invoke MessageBox,0,addr obuf2,addr ascii,MB_OK
invoke ExitProcess,0
Ansi2Unicode proc iString,ouptbuf
invoke lstrlen,iString
invoke MultiByteToWideChar,CP_ACP,0,iString,-1,ouptbuf,eax
ret
Ansi2Unicode endp
Unicode2Ansi proc iString,ouptbuf
invoke lstrlen,ouptbuf
invoke WideCharToMultiByte,CP_ACP,0,iString,-1,ouptbuf,eax,0,0
invoke WideCharToMultiByte,CP_ACP,0,iString,-1,ouptbuf,eax,0,0
ret
Unicode2Ansi endp
end start
; convert.asm - description
; -------------------
; begin :Sat May 21 ,2005
; copyright :Webring (C) 2005
; email :Webring@email.com
; website :http://www.32bit.tk
; about :Ansi2Unicode,Unicode2Ansi
;
;
;==============================================================================
.386
.model flat, stdcall
option casemap:none ; Case sensitive
include \Masm32\include\windows.inc
include \Masm32\include\kernel32.inc
include \Masm32\include\user32.inc
includelib \Masm32\lib\user32.lib
includelib \Masm32\lib\kernel32.lib
Ansi2Unicode proto :DWORD,:DWORD
Unicode2Ansi proto :DWORD,:DWORD
.data
str1 db "mystring",0
ascii db "ascii",0
.data?
obuf db 1024 dup(?)
obuf2 db 1024 dup(?)
.code
start:
invoke Ansi2Unicode,addr str1,addr obuf
invoke MessageBoxW,0,addr obuf,addr obuf,MB_OK ;unicode MessageBox
invoke Unicode2Ansi,addr obuf,addr obuf2
invoke MessageBox,0,addr obuf2,addr ascii,MB_OK
invoke ExitProcess,0
Ansi2Unicode proc iString,ouptbuf
invoke lstrlen,iString
invoke MultiByteToWideChar,CP_ACP,0,iString,-1,ouptbuf,eax
ret
Ansi2Unicode endp
Unicode2Ansi proc iString,ouptbuf
invoke lstrlen,ouptbuf
invoke WideCharToMultiByte,CP_ACP,0,iString,-1,ouptbuf,eax,0,0
invoke WideCharToMultiByte,CP_ACP,0,iString,-1,ouptbuf,eax,0,0
ret
Unicode2Ansi endp
end start
well it took me time to find this from my hdd and there are two replies already :)
(this code kinda works for almost all english strings :)
did it when i was fidlling through asm oops it uses lodsw and stosb and gurus will complain they are slow :) 5 cycles for on stos some thing but still may be helpfull to understand basics
(this code kinda works for almost all english strings :)
did it when i was fidlling through asm oops it uses lodsw and stosb and gurus will complain they are slow :) 5 cycles for on stos some thing but still may be helpfull to understand basics
.386
.model flat,stdcall
option casemap:none
include \masm32\include\windows.inc
include \masm32\include\kernel32.inc
includelib \masm32\lib\kernel32.lib
include \masm32\include\user32.inc
includelib \masm32\lib\user32.lib
lstrlenW proto :dword ; needed coz lstrlenW isnt there in any .inc files
.data
unicode_string db "t",0,"h",0,"i",0,"s",0,32,0,"i",0,"s",0,32,0,"m",0,"y",0,32,0,"u",0,"n"
db 0,"i",0,"c",0,"o",0,"d",0,"e",0,32,0,"s",0,"t",0,"r",0,"i",0,"n",0,"g",0,32,0,"i"
db 0,32,0,"w",0,"a",0,"n",0,"t",0,32,0,"t",0,"o",0,32,0,"c",0,"o"
db 0,"n",0,"v",0,"e",0,"r",0,"t",0,0,0
caption db "unicode_convertor",0
.DATA?
ansistring db 100 dup (?)
.CODE
start:
mov edi,offset ansistring
mov esi,offset unicode_string
invoke lstrlenW,offset unicode_string ;returns no of chars
mov edx,eax
shl edx,2 ; multiply by 2
xor eax,eax
.while SDWORD ptr edx >= 0
lodsw
stosb
sub edx,4
.endw
invoke MessageBox,NULL ,offset ansistring,offset caption,NULL
invoke ExitProcess,NULL
end start
bluffer, great code man! I changed it abit so that it's faster and also included a nice unicode string macro written by elicz so that you can write your unicode string on 1 line(eg TEXTW unicode_string, <this is my unicode string i want to convert/0> ) instead of all that junk.
Thanks to all! I'll try.
Hi!
I try all methods. All they works with the single format UCS-2 little endian; this files have FFFE header and format SSCC where SS=symbol and CC=charset. Files written in UCS-2 big endian have FEFF header and format CCSS for each symbol. They can be rewritten with this code:
push ebx
mov esi, ahMem
xor ebx, ebx
.while ebx < aFileLen
mov ax, word ptr
xchg ah, al
mov word ptr , ax
add ebx, 2
add esi, 2
.endw
pop ebx
The third file format is UTF-8. About those I know only that they have EFBBBF header but I don't know how to rewrite them to UCS-2 LE format.
Mike
I try all methods. All they works with the single format UCS-2 little endian; this files have FFFE header and format SSCC where SS=symbol and CC=charset. Files written in UCS-2 big endian have FEFF header and format CCSS for each symbol. They can be rewritten with this code:
push ebx
mov esi, ahMem
xor ebx, ebx
.while ebx < aFileLen
mov ax, word ptr
xchg ah, al
mov word ptr , ax
add ebx, 2
add esi, 2
.endw
pop ebx
The third file format is UTF-8. About those I know only that they have EFBBBF header but I don't know how to rewrite them to UCS-2 LE format.
Mike