Hi there! First sorry for my poor English. I want to ask something about Disassembly. (I've read the excellent threads from The Svin. :alright: )

Here it starts:

When I got a opcode:

8B03

I know it means "Mov eax, "

Because:

8B is one of the opcodes of instruction "Mov", and "03" is the ModR/M.

03 = 00 000 011
;00(mod) - no displacement
;000 - eax
;011 -

So, I can manually translate opcodes into instructions. But when I wrote a program, I don't know how to let the program recognize and organize the opcodes. For example, when the program gets a byte "8B", how can it knows whether to go on getting the next byte("03") or not?

My idea is to create a opcode table to let the program to look-up from it. But if I use this method, it will make the table very big, so I don't know if this will be a good idea. :stupid:

Could anybody give me some ideas? Any helps will be greatly appreciated! :alright:

Thanks!
Posted on 2003-05-29 09:39:31 by pazuluo
My idea is to create a opcode table to let the program to look-up from it. But if I use this method, it will make the table very big, so I don't know if this will be a good idea.

The idea is not new. I have seen people creating such a list. But of course the list is long.

Well you could try
-scan for prefix
-if prefix present, set some flag somewhere (remember that prefixes can stack)
-loop through the 1 byte opcodes like inc/dec/xchg eax,reg/push reg/pop reg/lodsx/ etc
-scan for the opcode 0Fh -> and then work on the opcodes beginning with 0Fh ie bswap
-work on the other opcodes -> decrypt the direction and mod
-work on fpu
-work on MMX
-work on XMM

Something like that would do I suppose. Alot of coding.

I have some code for scanning


prefix_66h equ 02h
prefix_67h equ 04h
prefix_F0h equ 08h
prefix_F2h equ 010h
prefix_F3h equ 020h
prefix_2Eh equ 040h
prefix_36h equ 080h
prefix_3Eh equ 0100h
prefix_26h equ 0200h
prefix_64h equ 0400h
prefix_65h equ 0800h
.data
prefix_scan dd 066h, prefix_66h, 067h, prefix_67h, 0F0h, prefix_F0h, 0F2h, prefix_F2h
dd 0F3h, prefix_F3h, 02Eh, prefix_2Eh, 036h, prefix_36h, 03Eh, prefix_3Eh
dd 026h, prefix_26h, 064h, prefix_64h, 065h, prefix_65h

.code
dasm proc uses esi edi ebx pCode:dword,_size
mov esi,pCode
mov edx,_size
mov bit,0
_begin:
mov ecx,11
lea edi,prefix_scan
prefixloop:
mov al,[edi]
cmp byte ptr[esi],al
jnz @F
or bit,DWORD PTR[edi+4]
inc esi
dec edx
jnz _begin
@@:
add edi,8
dec ecx
jnz prefixloop
mov ecx,4
....
dasm endp

of course it is uncompleted as I could not be bothered to continue with it :)
Posted on 2003-05-29 10:36:29 by roticv
sadly, you won't get around creating said table.
Posted on 2003-05-29 10:44:14 by Tola
The idea is to parse the byte stream, reading and advancing in stream as required, a little just like CPU does :P
Based on state machines of course ...

it is not quite an 100% sure process after all

esp if you go backwards and/or unsure of starting point and/or if app uses simple anti debugging/disassembler techniques like calculating the next instruction and./or jumping based on calculations...

the above stuff will require a little execution also
Posted on 2003-05-29 11:44:51 by BogdanOntanu
Thank you all above! (Especially roticv) :alright:

It seems that I have to use the table, it's a lot of work, but I'll try. :)

Anybody could still give me some ideas? I'm appreciated! :)
Posted on 2003-05-29 12:05:32 by pazuluo
Start to work, thank you all!

Anybody else give me some ideas?
Posted on 2003-05-29 22:07:44 by pazuluo
Here is a disassembler to study:
http://home.no.net/tkos/src/debug/
Posted on 2003-05-29 22:13:02 by bitRAKE
I did abit of recoding and my code ends up becoming like:


prefix_66h equ 02h
prefix_67h equ 04h
prefix_F0h equ 08h
prefix_F2h equ 010h
prefix_F3h equ 020h
prefix_2Eh equ 040h
prefix_36h equ 080h
prefix_3Eh equ 0100h
prefix_26h equ 0200h
prefix_64h equ 0400h
prefix_65h equ 0800h
.data
reg32 db "eax",0,"ecx",0,"edx",0,"ebx",0,"esp",0,"ebp",0,"esi",0,"edi",0
reg16 db "ax",0,"cx",0,"dx",0,"bx",0,"sp",0,"bp",0,"si",0,"di",0
reg8 db "al",0,"cl",0,"dl",0,"bl",0,"ah",0,"ch",0,"dh",0,"bh",0
;_1byteopcode1
_daa db "daa",0
_das db "das",0
_aaa db "aaa",0
_aas db "aas",0
_pushes db "push es",0
_popes db "pop es",0
_pushcs db "push cs",0
_pushss db "push ss",0
_popss db "pop ss",0
_pushds db "push ds",0
_popds db "pop ds",0
_wait db "wait",0
_sahf db "sahf",0
_lahf db "lahf",0
_retn db "retn",0
_leave db "leave",0
_retf db "retf",0
_int3 db "int 3",0
_into db "into",0
_salc db "salc",0
_xlat db "xlat",0
_int01 db "int01",0
_hlt db "hlt",0
_cmc db "cmc",0
_clc db "clc",0
_stc db "stc",0
_cli db "cli",0
_sti db "sti",0
_cld db "cld",0
_std db "std",0
_nop db "nop",0
;_1byteopcode2
_pushad db "pushad",0
_popad db "popad",0
_pushfd db "pushfd",0
_popfd db "popfd",0
_cwde db "cwde",0
_cdq db "cdq",0
_pusha db "pusha",0
_popa db "popa",0
_pushf db "pushf",0
_popf db "popf",0
_cbw db "cbw",0
_cwd db "cwd",0
;
_inc db "inc",0
_dec db "dec",0
_push db "push",0
_pop db "pop",0
_xchgeax db "xchg eax, ",0
_xchgax db "xchg ax, ",0
align 4
prefix_scan dd 066h, prefix_66h, 067h, prefix_67h, 0F0h, prefix_F0h, 0F2h, prefix_F2h
dd 0F3h, prefix_F3h, 02Eh, prefix_2Eh, 036h, prefix_36h, 03Eh, prefix_3Eh
dd 026h, prefix_26h, 064h, prefix_64h, 065h, prefix_65h
;lbyteopcode_1 dd 08h, OFFSET _inc, 09h, OFFSET _dec, 0Ah, OFFSET _push, 0Bh, OFFSET _pop
_1byteopcode dd 027h, offset _daa, 02Fh, offset _das, 037h, offset _aaa, 03Fh, offset _aas
dd 06h, offset _pushes, 07h, offset _popes, 0Eh, offset _pushcs, 016h, offset _pushss
dd 017h, offset _popss, 01Eh, offset _pushds, 01Fh, offset _popds, 09Bh, offset _wait
dd 09Eh, offset _sahf, 09Fh, offset _lahf, 0C3h, offset _retn, 0C9h,offset _leave
dd 0CCh, offset _int3, 0CEh, offset _into, 0D6h, offset _salc, 0D7h, offset _xlat
dd 0F1h, offset _int01, 0F4h, offset _hlt, 0F5h, offset _cmc, 0F8h, offset _clc
dd 0F9h, offset _stc, 0FAh, offset _cli, 0FBh, offset _sti, 0FCh, offset _cld
dd 0FDh, offset _std, 090h, offset _nop
_1bytescan db 060h, 061h, 09Ch, 09Dh, 098h, 099h
align 4
_1bytetable1 dd offset _pushad, offset _popad, offset _pushfd, offset _popfd, offset _cwde, offset _cdq
_1bytetable2 dd offset _pusha, offset _popa, offset _pushf, offset _popf, offset _cbw, offset _cwd
_1byteopcode2 dd 08h, 09h, 0Ah, 0Bh, 12h
_1bytetable3 dd offset _inc, offset _dec, offset _push, offset _pop, offset _xchgeax
_1bytetable4 dd offset _inc, offset _dec, offset _push, offset _pop, offset _xchgax
.code
x86:
;[esp+4] = pointer to opcode to decode
;[esp+8] = pointer to string to store decoded opcode
;eax return size of opcode decoded
mov esi,[esp+4]
xor ebx,ebx
xor eax,eax
_begin:
mov ecx,11
lea edi,prefix_scan
_prefixscan:
mov dl,[edi]
cmp [esi],dl
jnz @F
or ebx,[edi+4]
inc eax
inc esi
jmp _begin
@@:
add edi,8
dec ecx
jnz _prefixscan
mov ecx,31
lea edi,_1byteopcode
_1byte1:
mov dl,[edi]
cmp [esi],dl
jnz @F
push [edi+4]
push [esp+8][4]
call stringcopy
inc eax
ret 8
@@:
add edi,8
dec ecx
jnz _1byte1
lea edi,_1bytescan
_1byte2:
mov dl,[esi]
cmp [edi],dl
jnz @F
lea edi, _1bytetable1
lea edx, _1bytetable2
test ebx, prefix_66h
cmovnz edi,edx
lea edx,[edi+ecx*4]
push edx
push [esp+8][4]
call stringcopy
inc eax
ret 8
@@:
inc edi
inc ecx
cmp ecx,6
jnz _1byte2
xor ecx,ecx
lea edi,_1byteopcode2
_1byte3:
movzx edx, byte ptr [esi]
push edx
shr edx,3
cmp edx, [edi]
jnz @F
pop edx
and edx, 0111y ;last 3 bits = register
test ebx, prefix_66h
jz _32bit1byteopcode
lea edi, _1bytetable4
shl ecx,2
add ecx,edi
push ecx
push [esp+8][4]
call stringcopy
lea edi, reg16
lea edx, [edx+ edx*2]
add edx, edi
push edx
push [esp+8][4]
call strcat
inc eax
ret 8
_32bit1byteopcode:
lea edi, _1bytetable3
shl ecx,2
add ecx,edi
push ecx
push [esp+8][4]
call stringcopy
lea edi,reg32
shl edx,2
add edx,edi
push edi
push [esp+8][4]
call strcat
inc eax
ret 8
@@:
pop edx
add edi,4
inc ecx
cmp ecx,5
jnz _1byte3
ret 8

stringcopy:
;[esp+4] = pointer to string destination
;[esp+8] = pointer to string source
pushad
mov esi,[esp+8][4*8]
mov edi,[esp+4][4*8]
@@:
mov al,[esi]
inc esi
mov [edi],al
inc edi
or al,al
jnz @B
popad
ret 8

strcat:
;[esp+4] = pointer to string destination
;[esp+8] = pointer to string to add
pushad
mov edx,[esp+8][4*8]
mov ecx,[esp+4][4*8]
_scanfornull:
mov al,[edx]
inc edx
or al,al
jnz _scanfornull
dec edx
_copystring:
mov al,[ecx]
inc ecx
mov [edx],al
inc edx
or al,al
jnz _copystring
popad
ret 8
Posted on 2003-06-04 08:24:46 by roticv