Hi guys,

I've written a hashfunction which creates a 32-byte hash out of a password.
Are there any ways i can test it for collisions etc?
I want to implement this function in an encryption program i am writing, so all ideas are welcome!

here's the function:

HashPassword proc inputpassword:DWORD, outputbuffer:DWORD
    LOCAL pwsize:DWORD

    invoke lstrlen,inputpassword
    .if eax==0
        dec eax
        ret
    .endif
    mov pwsize,eax
    xor eax,eax
    push inputpassword
    pop esi
    mov al,byte ptr
    mov ah,byte ptr
    bswap eax
    mov al,byte ptr
    mov ah,byte ptr
    mov ecx,78277783
    mul ecx
    xor eax,2742303
    mov ecx,20743729
    xor edx,edx
    div ecx
    bswap eax
    mul edx
    push outputbuffer
    pop edi
    mov dword ptr ,eax
    mov ecx,10757
    xor edx,edx
    div ecx
    bswap eax
    mul edx
    mov dword ptr ,eax
    mov ecx,77392473
    mul ecx
    mov cx,word ptr
    xor eax,ecx
    mov dword ptr ,eax
    bswap eax
    mov ecx,3297329
    mul ecx
    shl eax,3
    mov ecx,579362
    xor edx,edx
    div ecx
    xor ecx,ecx
    mov cl,byte ptr
    add eax,ecx
    bswap eax
    add eax,edx
    bswap eax
    mov dword ptr ,eax
    xor eax,24704739
    mov ecx,137
    mul ecx
    xor edx,edx
    bswap eax
    div ecx
    add eax,edx
    mov dword ptr ,eax
    mov ecx,13362972
    xor edx,edx
    div ecx
    add eax,2739384
    xor eax,edx
    rol eax,3
    sub eax,ecx
    mov ecx,dword ptr
    bswap ecx
    xor eax,ecx
    mov dword ptr ,eax
    mov ecx,dword ptr
    sub eax,ecx
    mov byte ptr ,al
    xor byte ptr ,ah
    bswap eax
    mov ecx,1337927
    mul ecx
    xchg eax,ecx
    xor edx,edx
    div cx
    add eax,ecx
    mov dword ptr ,eax
    mov ecx,dword ptr
    xor eax,ecx
    mul edx
    bswap eax
    and eax,208343
    mov dword ptr ,eax
    mov ecx,dword ptr
    xor eax,ecx
    bswap eax
    mov dword ptr ,eax
    mov ecx,dword ptr
    xor ecx,12237492
    mul ecx
    mov dword ptr ,eax
    mov ecx,dword ptr
    xchg eax,ecx
    xor edx,edx
    mov ebx,231773
    div ebx
    shl edx,4
    add eax,edx
    xor eax,ecx
    mov dword ptr ,eax
    mov ecx,dword ptr
    bswap ecx
    mul ecx
    shr ecx,8
    xor edx,edx
    div ecx
    xor ah,dl
    xor al,dh
    mov ecx,379
    mul ecx
    add eax,dword ptr
    xor eax,ebx
    mov dword ptr ,eax
    mov ebx,dword ptr
    xor eax,ebx
    mov dword ptr ,eax
    mov ebx,dword ptr
    xor edx,edx
    mul ebx
    add dword ptr ,eax
    xor dword ptr ,eax
    mov ecx,2023855
    mul ecx
    xor dword ptr ,eax
    mov ecx,dword ptr
    shl ecx,2
    xor eax,ecx
    mov dword ptr ,eax
    mov ecx,pwsize
    .if ecx<2
        xor eax,eax
        ret
    .endif
    inc esi
    dec ecx

hashloop:
    xor ebx,ebx
    mov bl,byte ptr
    mul ebx
    mov ebx,dword ptr
    xor eax,ebx
    mov dword ptr ,eax
    push eax
    mov eax,dword ptr
    mov ebx,3027347
    xor edx,edx
    div ebx
    pop ebx
    add ebx,edx
    xor eax,ebx
    bswap eax
    add dword ptr ,eax
    sub eax,27239
    mov edx,dword ptr
    bswap edx
    mul edx
    xor eax,edx
    mov edx,dword ptr
    xor eax,edx
    mov dword ptr,eax
    mov edx,dword ptr
    xor eax,edx
    mov ebx,2037275
    xor edx,edx
    div ebx
    xor edx,dword ptr
    xor eax,edx
    bswap eax
    shr edx,4
    mul edx
    mov ebx,dword ptr
    bswap ebx
    xor eax,ebx
    mov dword ptr ,eax
    mov ebx,dword ptr
    xor eax,ebx
    xor edx,edx
    mov ebx,984379
    div ebx
    mov ebx,dword ptr
    add ebx,edx
    xor eax,ebx
    bswap eax
    sub eax,ecx
    mov ebx,dword ptr
    xor eax,ebx
    mov dword  ptr ,eax
    mov ebx,dword ptr
    push ebx
    xor eax,ebx
    xor ebx,ebx
    mov bx,word ptr
    xor edx,edx
    div ebx
    pop ebx
    xor ebx,edx
    bswap ebx
    xor eax,ebx
    mov dword ptr ,eax
    add ebx,dword ptr
    xchg ebx,eax
    mul ecx
    bswap ebx
    mul ecx
    xor eax,ebx
    mov ebx,dword ptr
    xor eax,ebx
    mov dword ptr ,eax
    sub ebx,dword ptr
    add ebx,edx
    xor edx,edx
    mov dx,word ptr
    mul edx
    bswap eax
    xor eax,ebx
    mov dword ptr ,eax 
    inc esi
    dec ecx
    jnz hashloop
    xor eax,eax
    ret
HashPassword endp

I know it's a bit long, but i think it is could work.
I've written a program around it which generates a hash + password to a file like:

a-<hash>
b-<hash>
...
aa-<hash>
ab-<hash>
...
aaa-<hash>
...
zzzz-<hash>

I've did this until zzzz. Then i've imported the complete textfile (part by part) into an excel file with 1 DWORD value per cel.
Then i sorted all hashes per dword and checked if in any place there were 2 and the same dword values. in 65535 different hashes no one dword value in one row is the same.
So this should be promising.

Attached you will find the source for a file which prints the hash to StdOut and takes a password as commandline parameter.
Any ideas, comments on the alghoritm are highly appreciated since i'm still pretty new to cryptography.

Thanks in advance.

Kind regards,

Mark
Attachments:
Posted on 2006-06-22 11:21:14 by white scorpion
It would appear, then, that you do not need to worry about collisions.

Paul
Posted on 2006-06-22 11:27:12 by PBrennick
Hey,

how is it going?
How did the surgery went? I hope you are fine!

As for the algo, i've tested it with 65535 different hashes at a time. However, i am unable to test all the hashes that can be created and therefore unable to know what the collision rate would be.
I know that for every hash in md5 you should have 3-4 different passwords (if i remember correctly).
I'm wondering what that would be in this case and how one would calculate that.
Posted on 2006-06-22 13:48:33 by white scorpion
There is a great page (got this from Mark Larson over at the other place a few years back) about hash functions... take a look at:

http://burtleburtle.net/bob/hash/doobs.html

Not sure it will help you here, but for furture reference, it might help.

Ossa
Posted on 2006-06-22 14:40:08 by Ossa
Thanks, it's an interesting read indeed. But like you said, i haven't got a clue how to apply it here.
I don't really care about speed since it only has to hash once for every password / file that is encrypted.
So if the hash would take 0.2 or 0.9 seconds to complete isn't really important.

The first function i created for this wasn't a hash function and made the same output for 'a' as for 'aa' as for 'aaa' etc.
This one doesn't.
The result of a-z is this:

a-4476e474:05da931d:28dabc1e:d3f962f9:af94fb7f:d7d2ade8:7495cd20:1c5c539b
b-d3b976b6:59a80a92:d8a82389:47a5d2e6:33f2459a:de88be46:05068d0a:5be1d559
c-aa44b116:e8f0464d:66f07402:bf9167ef:f8e45263:665edb6e:db31ce46:807e97d9
d-aff47384:744bcfe3:fb4bd081:8af8b2ba:3b695720:f8f96321:2f9c3b67:baea0fb8
e-ccb4f218:245c8825:175cad64:7d6704b3:f3ba7214:11f90f8c:2155161a:759a8257
f-45a453b1:a317b596:0517b0ce:ffa48ff4:a7053225:faf79e04:36300e37:78c6885c
g-85cfc506:aa0a951f:5c0a9d5c:b155e0ea:2575d59b:5ccc75be:620e9722:8f686559
h-b566f49c:ebd831fc:7bd8117b:b7e23169:c426935a:7b040f13:f6f18ace:12eecb21
i-a9b463d1:e7e863af:d4e86870:602ac62a:0a93c89d:d57cae6a:bb9fda67:419c99fa
j-5e3bf9b0:966d7cb7:776d7ada:4087d43b:bbc14888:74ad08bd:bd5f1eb9:bd93c79f
k-fb22139e:d315425a:a3154eb2:a8b871ad:93b888be:5ca1af73:75f59f12:bb20dca0
l-b43d63d4:03304ed1:dc3060c1:111cce64:9743175b:d8399ca9:8bbf2f0b:dfe142de
m-0d5ef6e0:a7775863:75776eaa:5044ec79:4b0229e6:73bf77f6:a11018a5:2dbc6f2b
n-42d4dc50:460293de:7402a589:a173422b:60bce9ae:74333246:2361f44e:5c5348bf
o-ee13a0f8:aab6a839:b3b690bf:6fdce2ea:ab54ecf5:b61d2570:a5b8b249:b54a6af1
p-0644b1f8:b1be9edc:b9beae5a:b4da3fac:6c93284b:bbdc6101:13f37487:c64aeeb3
q-72ecf3e4:03677b09:76677405:c78b2f54:fa50f7df:749918ed:edb4c67c:d9ed056b
r-ac180a78:2eebf354:87ebd5de:299bb2eb:a6bd7039:862dfcaa:b063c94a:dad04887
s-80752c52:64d5d7f8:6cd5ffb3:93e339b7:bd2e9aef:6da172cf:ef5826fb:e9c1da2d
t-6758a266:e5210867:11211af9:52405578:4eb03490:11a6f1d5:0c3bafc9:bb843b9a
u-3de9dc43:52f856df:29f87b12:be64ce37:369c88ad:d694e08b:c326dbd3:0ec353d8
v-f5d474b0:cf6fa883:af6fbd60:0fcafc47:f4c89680:accf7585:23976332:1da041d7
w-6d648ba0:a15135f2:17513808:8175e3f9:b4359647:16bed0be:aca665cf:8041d52f
x-ea946baa:775f3ba1:705f32f9:2dc0f2b9:6ab4f2f3:74272e4f:45ee2083:403428f0
y-7d03f6c8:5ae70b5b:e0e7236e:d8c00be3:a0ff570d:e040f044:ee74f91d:c9d676ba
z-cccbacc8:2be707ce:66e71762:aab4e850:5fed60e2:678702df:47aec6ff:d68732c8


The result of a string of the same characters (a - aaaaaaaaaa) is also quite nice:

a-4476e474:05da931d:28dabc1e:d3f962f9:af94fb7f:d7d2ade8:7495cd20:1c5c539b
aa-0f19b52d:cfefc12c:db8108f8:25e44704:c76e1a2f:9726c3ef:c870a0d5:74baf800
aaa-4b0164ca:c603d377:20e56799:45c1dd5c:a1e6f32c:0cd47a46:c723b2fd:663143f2
aaaa-2a4e51be:8456ee1a:d1b2b3dc:e29e488a:37e43f93:db155800:c96c7f6b:05113a7e
aaaaa-0b2c2074:2ce44ec0:a376d69b:4f1ddd2c:fa92edeb:5358ddbb:f4762eb6:4b1bf436
aaaaaa-7eae3c89:b30529c1:e2fd5063:fc4bcd88:e3f8cb46:a83c75ef:81cc1f8f:4ea344b7
aaaaaaa-61725811:d4666307:59b4ee4e:dc922c9f:3ad23a99:21c1fda0:8344c6c2:4e053da9
aaaaaaaa-0ea590e0:2112b794:c710033d:64ef7ebf:2202702a:13200547:bb384220:59be6b51
aaaaaaaaa-905edc08:b9df2837:8abbf100:27acffe4:4864a2d4:73d86205:aa8acf08:d8e1d60c
aaaaaaaaaa-c90c4863:4a06ef6e:4ae9ec07:2ca30e86:a6efa5eb:f351c386:89f60d1e:fa58e138


I've added the : sign between every dword so it is more easy to import them to excel or compare them one dword at a time.
As you can see the result looks promising (IMO), but i'm still not sure how to test it to be absolutely sure.
The biggest thing i'm worried about is that this hash is going to be the input of a function which generates a double byte stream to pick bytes out of a table file which are used to XOR the bytes in a plaintext file.
It would be useless if there are too many passwords generating the same hash, since it would weaken the complete program.
Posted on 2006-06-23 00:19:59 by white scorpion
Remember that there's two categories of hashes. One is used for hash table lookups etc., and the main focus on those is speed, while having a fairly low amount of collisions.

The other category is cryptographically secure hashes, where you need to have as few collisions as possible, and avoid a number of other problems. Speed doesn't matter nearly as much as security.

It sounds like you're trying to construct a secure hash. My advice would be: don't, and use one of the existing algorithms that have been attacked to death by everybody and their dog.

Also for collision testing, it's sortof useless to test on strings like that. Google and download some dictionaries for various languages and see what happens on those.
Posted on 2006-06-23 06:50:16 by f0dder
I'm indeed writing a secure hash, and i already was advised not to write one on my own, but i still want to write one on my own.
The complete program will be fully written in my code, no code is borrowed from anyone (except the GUI part).
I know i will probably end up spending a whole lot of time optimizing the alghoritms, but  IMO it's worth the result.
I will use some dictionary files to run through the program, that's no problem, but when i do have the hashes in a file, how do i compare so many hashes?
Excel is worthless, i already noticed that.
Do you have any ideas for this?

Posted on 2006-06-23 11:53:49 by white scorpion

I'm indeed writing a secure hash, and i already was advised not to write one on my own, but i still want to write one on my own.

:) - it's an interesting project, but I wouldn't trust myself to write a secure hash before doing a LOT of research.


I will use some dictionary files to run through the program, that's no problem, but when i do have the hashes in a file, how do i compare so many hashes?
Excel is worthless, i already noticed that.
Do you have any ideas for this?

Good question, really.

Perhaps looking at the entropy of the output list of hashes is a good idea? Also, try sorting the {input,hash} pairs by {hash} - if it looks like some items are sorted by {input}, then the hash isn't good.

Hm.
Posted on 2006-06-23 12:04:18 by f0dder
That's the idea behind it, but how would i write a program that can handle that many items to sort?
excel can handle 65535 different items and thats not sufficient.
I think i would need a whole lot of memory, or an (almost) endless loop to sort such a thing.
Any ideas?
Posted on 2006-06-26 00:33:46 by white scorpion
It takes about 50 seconds to sort 10^7 dwords using Shellsort on 933mhz PIII. With a faster algo and cpu it would be possible to sort in a reasonable time pretty larger number of hashes. The problem will be checking all those values visually afterwards.  :shock:

As for writing the program.. Here is one solution:
Allocate two blocks of memory. Put all the string you want to hash in the first block. Hash a string and put the hash value along with a pointer to the original string in the second block. Sort the hashes while moving the pointer accordingly. Obviously you'll need to implement some progie to display the result (breaking the second block into smaller parts could help).
Posted on 2006-06-26 08:45:26 by arafel
Thanks for the explanation, but i just realized that the windows sort program should be able to help me sort them.
Indeed, the manual checking will be a pain in the butt, but it's worth it...
I'll let you know the results.

Posted on 2006-06-26 12:28:53 by white scorpion
There's a really good website for sorting algorithms, they're in java, but that's an easy enough syntax to grasp, even if you don't know the language itself.
http://www.cs.ubc.ca/~harrison/Java/sorting-demo.html

This details many different sorting algorithms, and even gives a visual representation of them, and their comperative speeds, also note that these speeds are much slower than they would be without any visual representation. The Fast Quick Sort on that page is the fastest sort algo they have there, and it's pretty damned fast, so if you want to speed up the sorting thyere, you should definately look into using that algo.

http://en.wikipedia.org/wiki/Sort_algorithm#Quicksort gives some nice information regarding what exactly you're doing, and what the ideal speed of a quicksort is.

http://en.wikipedia.org/wiki/Big_O_notation explains the notation of O(log n) and the other efficiancy info there.
Posted on 2006-06-26 13:59:29 by Bobbias
Thanks, i will check them some other time.
I've used the windows sort function which took about 15 minutes to sort a 3.4GB file, so that's sufficient for now.

I just tested the hash extensively with a lot of words.

Here's what i did:

i've downloaded every dictionary from http://www.picozip.com/prt/index.html
I've added all the dictionaries together to one big dictionary. Total size: 49MB

I have to say i had to build on extra check into the hash because with 1 inputted word the result was a devide by 0 which crashed the program.
After the updated code i ran the following tests:

I wrote a program which would hash every word in that dictfile and outputted it to
another file in the format <hash>-<word>

a total of 4,485,600 words (3.4GB file!)
Then i sorted the complete file using the windows sort.exe tool by hash.

After this i wrote another program which compared every line to the next.
-if they are the same then skip the line.
-if they are not the same, then compare only the hash.
-if hashes match then print them to stdout.

This resulted in 0 double hashes.

So in this case you would say that with 4,5 million tested passwords the hash is quite nice ;)
Posted on 2006-06-26 16:14:50 by white scorpion
Wow, sounds like your hash is shaping up quite nicely.
Posted on 2006-06-26 17:14:14 by Bobbias
Yes indeed.
I needed a way to use the inputted password for generating a stream of pseudo random dwords to pick bytes from the table file.
The first one i wrote was a simple additional function:
'passwordpasswordpasswordpass' etc...
of course this wasn't sufficient so i decided to write something else, which is this function.
I never tried to make it this efficient, but as time progresses it might be worth doing so. :D
Posted on 2006-06-27 02:01:13 by white scorpion
Anyone has any other ideas to test the alghoritm?
Any input is welcome since i'm fresh out of ideas...
Posted on 2006-06-28 10:03:39 by white scorpion
What about "reversing" the hash?
Posted on 2006-06-29 02:47:48 by roticv
IMO that's not possible, at least not completely since some bytes are dropped during the calculations which makes you miss some parts.

But even if it would be possible, for the use of the hash in my program that doesn't matter since the hash will never be known.
Posted on 2006-06-29 10:28:10 by white scorpion
little late on the reply, but ...

couple of things i'd do:

1. give c/pseudo code for the hash function
2. track byte changes through the algorithm
3. try all common attack vectors on your algorithm (google for them)
4. post your algorithm for public review
5. prepare submission for NIST hashing comp: http://www.csrc.nist.gov/pki/HashWorkshop/index.html
6. profit :)

i think the most intersting steps will be to publish the algorithm in c form and let others consider the possibility of reveresing it.
Posted on 2006-07-30 20:18:52 by abc123
There is a div by zero exception if i try to hash the password "aaaapjyd"

hashloop:
    xor ebx,ebx
    mov bl,byte ptr
    mul ebx
    mov ebx,dword ptr
    xor eax,ebx
    mov dword ptr ,eax
    push eax
    mov eax,dword ptr
    mov ebx,3027347
    xor edx,edx
    div ebx
    pop ebx
    add ebx,edx
    xor eax,ebx
    bswap eax
    add dword ptr ,eax
    sub eax,27239
    mov edx,dword ptr
    bswap edx
    mul edx
    xor eax,edx
    mov edx,dword ptr
    xor eax,edx
    mov dword ptr,eax
    mov edx,dword ptr
    xor eax,edx
    mov ebx,2037275
    xor edx,edx
    div ebx
    xor edx,dword ptr
    xor eax,edx
    bswap eax
    shr edx,4
    mul edx
    mov ebx,dword ptr
    bswap ebx
    xor eax,ebx
    mov dword ptr ,eax
    mov ebx,dword ptr
    xor eax,ebx
    xor edx,edx
    mov ebx,984379
    div ebx
    mov ebx,dword ptr
    add ebx,edx
    xor eax,ebx
    bswap eax
    sub eax,ecx
    mov ebx,dword ptr
    xor eax,ebx
    mov dword  ptr ,eax
    mov ebx,dword ptr
    push ebx
    xor eax,ebx
    xor ebx,ebx
    mov bx,word ptr
    xor edx,edx
    div ebx     <-- exception occures here
Posted on 2006-07-31 03:32:28 by mr.schyte