I didn't really make a search for a good ASM implementation of Blowfish so I decided to convert the C implementation from http://www.schneier.com/blowfish.html and optimize it to run faster than the C implementation.

The only functions really needing optimization is the Encipher function ( decipher is encipher with the loop counter going backwards so they're basically identical ) and F.

Here are the C implementations and my own ASM implementations of these functions.  I've done some optimizing ( there are some dependancies I need to eliminate if possible ) and I'm saving unrolling the loop for last.  Does anyone else see any other optimizations that can be made to this??

C:
unsigned long P;
unsigned long S[4][256];

inline unsigned long F( unsigned long X )
{
return ( ( ( S[0][ ( X >> 24 ) & 0xFF ] ) + S[1][ ( X >> 16 ) & 0xFF ] ) ^ S[2][ ( X >> 8 ) & 0xFF ] ) + S[3];
}

void __fastcall Blowfish_Encrypt(unsigned long *xl, unsigned long* xr) {
  unsigned long  Xl;
  unsigned long  Xr;
  unsigned long  temp;
  short      i;

  Xl = *xl;
  Xr = *xr;

  for (i = 0; i < N; ++i) {
    Xl = Xl ^ P;
    Xr = F(Xl) ^ Xr;

    temp = Xl;
    Xl = Xr;
    Xr = temp;
  }

  temp = Xl;
  Xl = Xr;
  Xr = temp;

  Xr = Xr ^ P;
  Xl = Xl ^ P;

  *xl = Xl;
  *xr = Xr;
}

ASM:
void __declspec( naked ) __fastcall BfEncipher( unsigned long* Left, unsigned long* Right )
{
// ecx = Left, edx = Right
// esi = S box, edi = P box
__asm
{
//Data setup for cipher
push eax;
push ebx;
push ecx;
push edx;
push ebp;
push esi;
push edi;
mov ecx, dword ptr ;
mov edx, dword ptr ;
mov esi, offset S;
mov edi, offset P;
xor ebx, ebx;

// Beginning of loop
ENCIPHER_BEGIN:

xor ecx, [ edi + ebx ];
add ebx, 0x04;
bswap ecx;
movzx ebp, cl;
shl ebp, 0x02;
mov eax, [ esi + ebp ];
movzx ebp, ch;
shl ebp, 0x02;
add eax, [ esi + ebp + 0x0400 ];
bswap ecx;
movzx ebp, ch;
shl ebp, 0x02;
xor eax, [ esi + ebp + 0x0800 ];
movzx ebp, cl;
shl ebp, 0x02;
add eax, [ esi + ebp + 0x0C00 ];
xor edx, eax;
xchg ecx, edx;
cmp ebx, 0x40;
jb ENCIPHER_BEGIN;

xchg ecx, edx;
xor edx, [ edi + 0x40 ]
xor ecx, [ edi + 0x44 ]

mov eax, ecx;
mov ebx, edx;
pop edi;
pop esi;
pop ebp;
pop edx;
pop ecx;
mov , eax;
mov , ebx;
pop ebx;
pop eax;
ret;
}
}


Thanks a lot
-jMerliN
Posted on 2006-04-07 01:47:05 by jMerliN
2 SpooK:  ok, i'll try to be more careful...

2 jMerliN: here 2 different asm sources which i have
Posted on 2006-04-07 06:23:27 by ivan2k2