For any one who's interested... it's small and pretty fast... not quite done yet tho... the source comes with a description and original defenitioncode in c... bla bla and so on...
Posted on 2001-12-20 16:38:02 by NervGaz
You might have to look at this TEA stuff... interesting stuff imo :).
Source might not build, as I messed up and had to restore some
files in a hurry. To get better speed, conver the source so it works
on multiple blocks of data instead of a single block at a time.

Not sure if the "key-in-code" approach is much faster, but I can't
really check until I make the code work on multiple blocks of data.

Have fun.
Posted on 2001-12-20 22:29:28 by f0dder
fodder:
it built ok but i didn't test if it ran... anyway... very interesting...
but i think you've based that on a later version of TEA than the one i used...
hehe that means that your version is more secure and stuff like that
what i did notice is that yours is larger when it's compiled...
wich kinda takes away part of the purpose behind TEA
It is intended for use in applications where code size is at a premium

anyway... umm i'll include a zip with a disassembly of them to compare...
both were compiled to coff object format and so on...
yours with nasm and mine with masm

blabla and so on...
Posted on 2001-12-21 02:43:35 by NervGaz
Yeah, my version is sorta fatty ;).
Posted on 2001-12-21 02:48:37 by f0dder
What about using MMX?
Posted on 2001-12-21 09:45:05 by bitRAKE
After several hours of my weekend:
Except, it has some bugs in it, :( so it doesn't work.
I have spent many hours searching for what is wrong but I cannot find what is wrong. Instead of abandoning it here is the encryption source for someone to point out to me what I have screwed up on :).

I see quite alot of potential in this proc, as it is 100~ Bytes and the main loop only has 17 instructions.




TEAEncryptMMX PROC v:DWORD,k:DWORD,n:DWORD
.DATA
delta QWORD 09e3779b99e3779b9h
.CODE


mov esi,v
mov edi,k
movq MM0,QWORD PTR [esi]
movq MM5,QWORD PTR [edi + 0] ;MM5 = k[1],k[0]
movq MM2,QWORD PTR [edi + 8] ;MM2 = k[3],k[2]
movq MM1,MM2 ;MM1 = k[3],k[2]
punpckldq MM2,MM5 ;MM2 = k[0],k[2]
punpckhdq MM1,MM5 ;MM1 = k[1],k[3]
pxor MM6,MM6 ;MM6 = sum = 0
movq MM7,delta ;MM7 = 09e3779b9h,09e3779b9h


mov ecx,n
.REPEAT
paddd MM6,MM7 ;MM6 = sum + delta
movq MM3,MM0 ;MM3 = z,y
movq MM4,MM0 ;MM4 = z,y
psrld MM3,5 ;MM3 = z >> 5,y >> 5
pslld MM4,4 ;MM4 = z << 4,y << 4
paddd MM3,MM1 ;MM3 = (z >> 5) + k[1],(y >> 5) + k[3]
paddd MM4,MM2 ;MM4 = (z << 4) + k[0],(y << 4) + k[2]
movq MM5,MM0 ;MM3 = z,y
paddd MM5,MM6 ;MM3 = z+sum,y+sum
pxor MM3,MM5 ;MM3 = (z >> 5) + k[1] ^ z+sum,(y >> 5) + k[3] ^ y+sum
pxor MM4,MM3 ;MM4 = (z << 4) + k[2] ^ z+sum ^ (z >> 5) + k[3],
; (y << 4) + k[0] ^ y+sum ^ (y >> 5) + k[1]
movq MM3,MM0
psrlq MM3,32 ;MM0 = 0,z
psllq MM0,32 ;MM5 = y,0
por MM0,MM3 ;MM0 = y,z
paddd MM0,MM4 ;MM0 = y + (z << 4) + k[2] ^ z+sum ^ (z >> 5) + k[3],
; z + (y << 4) + k[0] ^ y+sum ^ (y >> 5) + k[1]

dec ecx
.UNTIL ZERO?



movq QWORD PTR [esi],MM0
ret
TEAEncryptMMX ENDP
Posted on 2001-12-22 22:32:29 by huh
I haven't tested it, but this is a translation of the innerloop of f0dder's code. The KEY structure is an array of qwords that should be in the cache - they need to be dword interleaved: 010123234545...
teaEncodeMMX:

mov eax,[esp+12] ; pointer to data
mov edx,[esp+8] ; pointer to KEY
movq mm0,[eax]
movq mm1,[eax+8]
mov ecx,[esp+4] ; (number of rounds - 1)*2
; data0 += ((data1 << 4) ^ (data1 >> 5)) + data1 ^ sum + KEY[sum & 3];
@@: movq mm2, mm1 ; data1
movq mm3, mm1 ; data1
pslld mm2, 4
psrld mm3, 5
pxor mm2, mm3
paddd mm2, mm1
pxor mm2, [edx+ecx*8]
paddd mm0, mm2
; data1 += ((data0 << 4) ^ (data0 >> 5)) + data0 ^ sum + KEY[(sum >> 11) & 3];
movq mm2, mm0 ; data0
movq mm3, mm0 ; data0
pslld mm2, 4
psrld mm3, 5
pxor mm2, mm3
paddd mm2, mm0
pxor mm2, [edx+ecx*8+8]
paddd mm1, mm2
dec ecx
dec ecx
jns @B
movq [eax],mm0
movq [eax+8],mm1
ret 12

;reverse dword interleaved (each letter is a dword)
;example layout for 16 rounds...
KEY: EFEF,CDCD,ABAB,8989,6767,4545,2323,0101
Of course, double it over - we are only using half the MMX regs - do 32 bytes at a time, and interleave all the code. I left it this way for debugging...:)
Posted on 2001-12-23 02:29:31 by bitRAKE