For any one who's interested... it's small and pretty fast... not quite done yet tho... the source comes with a description and original defenitioncode in c... bla bla and so on...

You might have to look at this TEA stuff... interesting stuff imo :).

Source might not build, as I messed up and had to restore some

files in a hurry. To get better speed, conver the source so it works

on multiple blocks of data instead of a single block at a time.

Not sure if the "key-in-code" approach is much faster, but I can't

really check until I make the code work on multiple blocks of data.

Have fun.

Source might not build, as I messed up and had to restore some

files in a hurry. To get better speed, conver the source so it works

on multiple blocks of data instead of a single block at a time.

Not sure if the "key-in-code" approach is much faster, but I can't

really check until I make the code work on multiple blocks of data.

Have fun.

fodder:

it built ok but i didn't test if it ran... anyway... very interesting...

but i think you've based that on a later version of TEA than the one i used...

hehe that means that your version is more secure and stuff like that

what i did notice is that yours is larger when it's compiled...

wich kinda takes away part of the purpose behind TEA

anyway... umm i'll include a zip with a disassembly of them to compare...

both were compiled to coff object format and so on...

yours with nasm and mine with masm

blabla and so on...

it built ok but i didn't test if it ran... anyway... very interesting...

but i think you've based that on a later version of TEA than the one i used...

hehe that means that your version is more secure and stuff like that

what i did notice is that yours is larger when it's compiled...

wich kinda takes away part of the purpose behind TEA

It is intended for use in applications where code size is at a premium

anyway... umm i'll include a zip with a disassembly of them to compare...

both were compiled to coff object format and so on...

yours with nasm and mine with masm

blabla and so on...

Yeah, my version is sorta fatty ;).

What about using MMX?

After several hours of my weekend:

Except, it has some bugs in it, :( so it doesn't work.

I have spent many hours searching for what is wrong but I cannot find what is wrong. Instead of abandoning it here is the encryption source for someone to point out to me what I have screwed up on :).

I see quite alot of potential in this proc, as it is 100~ Bytes and the main loop only has 17 instructions.

Except, it has some bugs in it, :( so it doesn't work.

I have spent many hours searching for what is wrong but I cannot find what is wrong. Instead of abandoning it here is the encryption source for someone to point out to me what I have screwed up on :).

I see quite alot of potential in this proc, as it is 100~ Bytes and the main loop only has 17 instructions.

```
```

TEAEncryptMMX PROC v:DWORD,k:DWORD,n:DWORD

.DATA

delta QWORD 09e3779b99e3779b9h

.CODE

mov esi,v

mov edi,k

movq MM0,QWORD PTR [esi]

movq MM5,QWORD PTR [edi + 0] ;MM5 = k[1],k[0]

movq MM2,QWORD PTR [edi + 8] ;MM2 = k[3],k[2]

movq MM1,MM2 ;MM1 = k[3],k[2]

punpckldq MM2,MM5 ;MM2 = k[0],k[2]

punpckhdq MM1,MM5 ;MM1 = k[1],k[3]

pxor MM6,MM6 ;MM6 = sum = 0

movq MM7,delta ;MM7 = 09e3779b9h,09e3779b9h

mov ecx,n

.REPEAT

paddd MM6,MM7 ;MM6 = sum + delta

movq MM3,MM0 ;MM3 = z,y

movq MM4,MM0 ;MM4 = z,y

psrld MM3,5 ;MM3 = z >> 5,y >> 5

pslld MM4,4 ;MM4 = z << 4,y << 4

paddd MM3,MM1 ;MM3 = (z >> 5) + k[1],(y >> 5) + k[3]

paddd MM4,MM2 ;MM4 = (z << 4) + k[0],(y << 4) + k[2]

movq MM5,MM0 ;MM3 = z,y

paddd MM5,MM6 ;MM3 = z+sum,y+sum

pxor MM3,MM5 ;MM3 = (z >> 5) + k[1] ^ z+sum,(y >> 5) + k[3] ^ y+sum

pxor MM4,MM3 ;MM4 = (z << 4) + k[2] ^ z+sum ^ (z >> 5) + k[3],

; (y << 4) + k[0] ^ y+sum ^ (y >> 5) + k[1]

movq MM3,MM0

psrlq MM3,32 ;MM0 = 0,z

psllq MM0,32 ;MM5 = y,0

por MM0,MM3 ;MM0 = y,z

paddd MM0,MM4 ;MM0 = y + (z << 4) + k[2] ^ z+sum ^ (z >> 5) + k[3],

; z + (y << 4) + k[0] ^ y+sum ^ (y >> 5) + k[1]

dec ecx

.UNTIL ZERO?

movq QWORD PTR [esi],MM0

ret

TEAEncryptMMX ENDP

I haven't tested it, but this is a translation of the innerloop of

**f0dder**'s code. The KEY structure is an array of qwords that should be in the cache - they need to be dword interleaved: 010123234545...```
teaEncodeMMX:
```

mov eax,[esp+12] ; pointer to data

mov edx,[esp+8] ; pointer to KEY

movq mm0,[eax]

movq mm1,[eax+8]

mov ecx,[esp+4] ; (number of rounds - 1)*2

; data0 += ((data1 << 4) ^ (data1 >> 5)) + data1 ^ sum + KEY[sum & 3];

@@: movq mm2, mm1 ; data1

movq mm3, mm1 ; data1

pslld mm2, 4

psrld mm3, 5

pxor mm2, mm3

paddd mm2, mm1

pxor mm2, [edx+ecx*8]

paddd mm0, mm2

; data1 += ((data0 << 4) ^ (data0 >> 5)) + data0 ^ sum + KEY[(sum >> 11) & 3];

movq mm2, mm0 ; data0

movq mm3, mm0 ; data0

pslld mm2, 4

psrld mm3, 5

pxor mm2, mm3

paddd mm2, mm0

pxor mm2, [edx+ecx*8+8]

paddd mm1, mm2

dec ecx

dec ecx

jns @B

movq [eax],mm0

movq [eax+8],mm1

ret 12

;reverse dword interleaved (each letter is a dword)

;example layout for 16 rounds...

KEY: EFEF,CDCD,ABAB,8989,6767,4545,2323,0101

Of course, double it over - we are only using half the MMX regs - do 32 bytes at a time, and interleave all the code. I left it this way for debugging...:)