Hi all,

I created this little gem during some boring classes:


int instructionCount(unsigned char *func)
{
int count = 0;

while(*func != 0xCC)
{
// Skip prefixes F0h, F2h, F3h, 66h, 67h, D8h-DFh
int operandSize = 4;
int FPU = 0;
while(*func == 0xF0 ||
*func == 0xF2 ||
*func == 0xF3 ||
(*func & 0xFE) == 0x66 ||
(*func & 0xF8) == 0xD8)
{
if(*func == 0x66)
{
operandSize = 2;
}
else if((*func & 0xF8) == 0xD8)
{
FPU = *func++;
break;
}

func++;
}

// Skip two-byte opcode byte
bool twoByte = false;
if(*func == 0x0F)
{
twoByte = true;
func++;
}

// Skip opcode byte
unsigned char opcode = *func++;

// Skip mod R/M byte
unsigned char modRM = 0xFF;
if(FPU)
{
if((opcode & 0xC0) != 0xC0)
{
modRM = opcode;
}
}
else if(!twoByte)
{
if((opcode & 0xC4) == 0x00 ||
(opcode & 0xF4) == 0x60 && ((opcode & 0x0A) == 0x02 || (opcode & 0x09) == 0x9) ||
(opcode & 0xF0) == 0x80 ||
(opcode & 0xF8) == 0xC0 && (opcode & 0x0E) != 0x02 ||
(opcode & 0xFC) == 0xD0 ||
(opcode & 0xF6) == 0xF6)
{
modRM = *func++;
}
}
else
{
if((opcode & 0xF0) == 0x00 && (opcode & 0x0F) >= 0x04 && (opcode & 0x0D) != 0x0D ||
(opcode & 0xF0) == 0x30 ||
opcode == 0x77 ||
(opcode & 0xF0) == 0x80 ||
(opcode & 0xF0) == 0xA0 && (opcode & 0x07) <= 0x02 ||
(opcode & 0xF8) == 0xC8)
{
// No mod R/M byte
}
else
{
modRM = *func++;
}
}

// Skip SIB and displacement
if((modRM & 0x07) == 0x04) func += 1; // SIB
if((modRM & 0xC5) == 0x05) func += 4; // Dword displacement, no base
if((modRM & 0xC0) == 0x40) func += 1; // Byte displacement
if((modRM & 0xC0) == 0x80) func += 4; // Dword displacement

// Skip immediate
if(FPU)
{
// Can't have immediate operand
}
else if(!twoByte)
{
if((opcode & 0xC7) == 0x04 ||
(opcode & 0xFE) == 0x6A || // PUSH/POP/IMUL
(opcode & 0xF0) == 0x70 || // Jcc
opcode == 0x80 ||
opcode == 0x83 ||
(opcode & 0xFD) == 0xA0 || // MOV
opcode == 0xA8 || // TEST
opcode == 0xB0 || // MOV
(opcode & 0xFE) == 0xC0 || // RCL
opcode == 0xC6 || // MOV
opcode == 0xCD || // INT
(opcode & 0xFE) == 0xD4 || // AAD/AAM
(opcode & 0xF8) == 0xE0 || // LOOP/JCXZ
opcode == 0xEB ||
opcode == 0xF6 && (modRM & 0x30) == 0x00) // TEST
{
func += 1;
}
else if((opcode & 0xF7) == 0xC2)
{
func += 2; // RET
}
else if((opcode & 0xFC) == 0x80 ||
(opcode & 0xC7) == 0x05 ||
(opcode & 0xFE) == 0xE8 || // CALL/Jcc
(opcode & 0xFE) == 0x68 ||
(opcode & 0xFC) == 0xA0 ||
(opcode & 0xEE) == 0xA8 ||
opcode == 0xC7 ||
opcode == 0xF7 && (modRM & 0x30) == 0x00)
{
func += operandSize;
}
}
else
{
if(opcode == 0xBA || // BT
opcode == 0x0F || // 3DNow!
(opcode & 0xFC) == 0x70 || // PSLLW
(opcode & 0xF7) == 0xA4 || // SHLD
opcode == 0xC2 ||
opcode == 0xC4 ||
opcode == 0xC5 ||
opcode == 0xC6)
{
func += 1;
}
else if((opcode & 0xF0) == 0x80)
{
func += operandSize; // Jcc -i
}
}

count++;
}

return count;
}

It actually only counts the number of instructions in a binary buffer. But it's very easy to extend to a full decoder. It's this compact because it doesn't store a full instruction table at all. It actually works the way an x86 processor does its decoding. So it's also useful for a processor emulator. It has been tested extensively with, what else, SoftWire. I hope it's of use to anyone...

Enjoy!

Nicolas
Posted on 2004-11-18 22:03:14 by C0D1F1ED
very useful :)

thanks
Posted on 2004-11-19 09:21:55 by comrade
Nice.
Posted on 2004-11-19 11:00:45 by rea
heh, i just thought "i recognize this code". funny to meet your alter-ego on that other site too :)
Posted on 2004-11-19 14:37:00 by lifewire
IMO its much more efficient to compress a table index-wise, a quasi-RLE, and write a full LDE instead. my version was around ~420b, using the same table-compression technique as RGBLDE (which is by far the smallest technique yet :) ). hand-encoding the structions for a full decoder is not only bulky, but there are various discreptencies that must be dealt with as well.
Posted on 2004-11-22 22:47:38 by Drocon
00401B40 55 push ebp
00401B41 8B EC mov ebp,esp
00401B43 6A FF push 0FFh
00401B45 CC int 3

your code seems can't return the correct count of above code snippet
Posted on 2004-12-01 04:11:46 by Ares
Hi C0D1F1ED,
Here is my modification, hope I haven't made any mistake.


int instructionCount(unsigned char *func)
{
int count = 0;

while(*func != 0xCC)
{
// Skip prefixes F0h, F2h, F3h, 66h, 67h, D8h-DFh
// also need to skip segment prefixes, 2Eh,26h,36h,64h,65h
int operandSize = 4;
int FPU = 0;
while(*func == 0xF0 ||
*func == 0xF2 ||
*func == 0xF3 ||
(*func & 0xFE) == 0x66 ||
(*func & 0xF8) == 0xD8 ||
*func == 0x2E ||
*func == 0x26 ||
*func == 0x36 ||
*func == 0x64 ||
*func == 0x65
)
{
if(*func == 0x66)
{
operandSize = 2;
}
else if((*func & 0xF8) == 0xD8)
{
FPU = *func++;
break;
}

func++;
}

// Skip two-byte opcode byte
bool twoByte = false;
if(*func == 0x0F)
{
twoByte = true;
func++;
}

// Skip opcode byte
unsigned char opcode = *func++;

// Skip mod R/M byte
unsigned char modRM = 0xFF;
if(FPU)
{
if((opcode & 0xC0) != 0xC0)
{
modRM = opcode;
}
}
else if(!twoByte)
{
if((opcode & 0xC4) == 0x00 ||
(opcode & 0xF4) == 0x60 && ((opcode & 0x0A) == 0x02 || (opcode & 0x09) == 0x9) ||
(opcode & 0xF0) == 0x80 ||
(opcode & 0xF8) == 0xC0 && (opcode & 0x0E) != 0x02 ||
(opcode & 0xFC) == 0xD0 ||
(opcode & 0xF6) == 0xF6)
{
modRM = *func++;
}
}
else
{
if((opcode & 0xF0) == 0x00 && (opcode & 0x0F) >= 0x04 && (opcode & 0x0D) != 0x0D ||
(opcode & 0xF0) == 0x30 ||
opcode == 0x77 ||
(opcode & 0xF0) == 0x80 ||
(opcode & 0xF0) == 0xA0 && (opcode & 0x07) <= 0x02 ||
(opcode & 0xF8) == 0xC8)
{
// No mod R/M byte
}
else
{
modRM = *func++;
}
}

// Skip SIB and displacement
if((modRM & 0x07) == 0x04 && (modRM >> 6) & 3 != 3) func += 1; // SIB
if((modRM & 0xC5) == 0x05) func += 4; // Dword displacement, no base
if((modRM & 0xC0) == 0x40) func += 1; // Byte displacement
if((modRM & 0xC0) == 0x80) func += 4; // Dword displacement

// Skip immediate
if(FPU)
{
// Can't have immediate operand
}
else if(!twoByte)
{
if((opcode & 0xC7) == 0x04 ||
(opcode & 0xFE) == 0x6A || // PUSH/POP/IMUL
(opcode & 0xF0) == 0x70 || // Jcc
opcode == 0x80 ||
opcode == 0x83 ||
(opcode & 0xFD) == 0xA0 || // MOV
opcode == 0xA8 || // TEST
opcode == 0xB0 || // MOV
(opcode & 0xFE) == 0xC0 || // RCL
opcode == 0xC6 || // MOV
opcode == 0xCD || // INT
(opcode & 0xFE) == 0xD4 || // AAD/AAM
(opcode & 0xF8) == 0xE0 || // LOOP/JCXZ
opcode == 0xEB ||
opcode == 0xF6 && (modRM & 0x30) == 0x00) // TEST
{
func += 1;
}
else if((opcode & 0xF7) == 0xC2)
{
func += 2; // RET
}
else if((opcode & 0xFC) == 0x80 ||
(opcode & 0xC7) == 0x05 ||
(opcode & 0xFE) == 0xE8 || // CALL/Jcc
(opcode & 0xFE) == 0x68 ||
(opcode & 0xFC) == 0xA0 ||
(opcode & 0xEE) == 0xA8 ||
opcode == 0xC7 ||
opcode == 0xF7 && (modRM & 0x30) == 0x00)
{
func += operandSize;
}
}
else
{
if(opcode == 0xBA || // BT
opcode == 0x0F || // 3DNow!
(opcode & 0xFC) == 0x70 || // PSLLW
(opcode & 0xF7) == 0xA4 || // SHLD
opcode == 0xC2 ||
opcode == 0xC4 ||
opcode == 0xC5 ||
opcode == 0xC6)
{
func += 1;
}
else if((opcode & 0xF0) == 0x80)
{
func += operandSize; // Jcc -i
}
}

count++;
}

return count;
}

regards,
Posted on 2004-12-02 08:12:44 by Ares
Hmm... we need three more changes...

(1)


while(*func == 0xF0 ||
*func == 0xF2 ||
*func == 0xF3 ||
(*func & 0xFE) == 0x66 ||
(*func & 0xF8) == 0xD8 ||
*func == 0x2E ||
*func == 0x26 ||
*func == 0x36 ||
*func == 0x64 ||
*func == 0x65
)

add the prefix 0x3E (DS segment override)

(2)


if((modRM & 0x07) == 0x04 && (modRM >> 6) & 3 != 3)

oops, a parenthesis:


if((modRM & 0x07) == 0x04 && (modRM >> 6 & 3) != 3)

else the code <8B 34 01> mov esi,dword ptr
is assigned 2 bytes

(3)


if((modRM & 0x07) == 0x04 && (modRM >> 6) & 3 != 3) func += 1; // SIB

should be replaced with


if((modRM & 0x07) == 0x04 && (modRM>>6 & 3) != 3
) { // SIB
unsigned char SIB = *func;
func += 1;
if ((SIB & 0x7) == 5) func += 4; // disp32
}

to take into account SIBs followed by disp32, e.g.
<8B 15 00 00 40 00> mov edx,dword ptr ds:[400000h]

Regards, bilbo

P.S. it looks like Drocon is right... google for RGBLDE.ZIP...
Posted on 2004-12-03 07:55:34 by bilbo
This is realy great code :) I've seen this a long time ago, and I was afraid that this was deleted during hack attack :(
Thanks god it is still here :)

Great job man, really great job...
Posted on 2005-03-10 18:17:57 by deroko
C0D1F1ED,

Do you have updated code after all these comments.

I made one myself but it is best if you can check and repost.

Anyway, great work.

Visu
Posted on 2005-04-09 11:19:43 by visu