COMPARE macro item1,item2

mov edx,[ebx + item1*4]
cmp edx,[ebx + item2*4]
ENDM


EXCHANGE macro item1,item2
mov edx,[ebx + item1*4] ; edx is the only free reg
push DWORD PTR [ebx + item2*4]
mov [ebx + item2*4],edx
pop DWORD PTR [ebx + item1*4]
ENDM



QuickSort PROC uses esi edi ebx, qARRAY:DWORD, qLOW:DWORD, qHIGH:DWORD
mov ebx, qARRAY
mov esi, qLOW
mov edi, qHIGH
call PARTITION
ret ; let MASM do it's "magic" on this ret

; Partition the array defined by index range [esi,edi] into
; two ranges: one with items lessthanequal item (ecx) and
; one with items greaterthanequal item (ecx). Recursively process
; sub-partitions. If there are zero/one items greater then it is
; possible to skip the upper partition. Don't move items
; equal to pivot item.
;
; OnEntry:
; ebx = Array pointer (doesn't change)
; esi = Lower index [0,edi-1]
; edi = Upper index [1,+]
PARTITION:
mov eax,edi
push esi
push edi
sub eax,esi ; width of partition - 1
jle @exit ; must be >1 elements to sort

; bad choice for sorted data
mov ecx,esi ; choose pivot
;;;; inc esi ; avoid moving pivot

inc edi ; counter first dec

@low: inc esi
dec eax
js @done
COMPARE ecx,esi ; index1, index2
jge @low
; esi item needs to be moved into high partition
@high: dec edi
dec eax
js @done
COMPARE ecx,edi ; index1, index2
jle @high
EXCHANGE esi,edi
jmp @low
; esi=edi ; (esi) item part of high partition
@done: dec esi
EXCHANGE ecx,esi
cmp edi,[esp]
jge @half
dec esi ; pivot is sorted
mov eax,[esp]
mov [esp],esi
xchg edi,eax
xchg esi,eax
call PARTITION
pop edi
pop esi
jmp PARTITION
;fix highhalf size of one skip
@half: pop edi ; pivot was greatest item
pop esi
dec edi ; reduce partition size by one
jmp PARTITION
@exit: add esp,8
db 0C3h ; ret ; force _REAL_ ret in MASM

QuickSort ENDP
This should be pretty fast - I haven't had time to time it. :) I want to make a few changes, but I'm thinking it over some more. Hutch, shouldn't have any problem digesting this one. Warning: there is much going on here, so be careful making changes.
Posted on 2001-12-15 05:45:56 by bitRAKE
Okay, revised again. :) This version shows how to sort strings using this algo. I've added some comments, but they wouldn't be of much help unless you know what quick sort is - mainly they state why I choose the pivot item I have, and how to change it. The registers have been changed to favor windows code within the COMPARE/EXCHANGE macros (ie only ECX needs to be saved - windows saves the rest).
;COMPARE macro item1,item2

; mov edx,[ebx + item1*4]
; cmp edx,[ebx + item2*4]
;ENDM

COMPARE macro item1,item2
push ecx
invoke lstrcmp,DWORD PTR [ebx + item1*4],DWORD PTR [ebx + item2*4]
cmp eax,0
pop ecx
ENDM


EXCHANGE macro item1,item2
mov edx,[ebx + item1*4]
mov eax,[ebx + item2*4]
mov [ebx + item2*4],edx
mov [ebx + item1*4],eax
ENDM



QuickSort PROC uses esi edi ebx, qARRAY:DWORD, qLOW:DWORD, qHIGH:DWORD
push ebp ; an extra register, oh boy!
mov ebx, qARRAY
mov esi, qLOW
mov edi, qHIGH
call PARTITION
pop ebp
ret ; let MASM do it's "magic" on this ret

; Partition the array defined by index range [esi,edi] into
; two ranges: one with items lessthanequal item (ecx) and
; one with items greaterthanequal item (ecx). Recursively process
; sub-partitions. If there are zero/one items greater then it is
; possible to skip the upper partition. Don't move items
; equal to pivot item.
;
; OnEntry:
; ebx = Array pointer (doesn't change)
; esi = Lower index [0,edi-1]
; edi = Upper index [1,+]
; During Main Section of Code:
; ebx = (see above)
; esi = top index of lower partition
; ebp = bottom index of upper partition
; edi = (ebp-esi) gap between partition, approaches zero
; ecx = pivot index
; eax/edx = (disposable)
PARTITION:
push esi
push edi
mov ebp,edi
sub edi,esi ; width of partition - 2
jle @exit ; must be >1 elements to sort

; bad choice for sorted data
mov ecx,esi ; choose pivot
; better choice?
; lea ecx,[esi+ebp]
; shr ecx,1
;Note: You can choose any pivot you want - algorithm doesn't move
; items that equal the pivot - including the pivot, which is
; required! A general solution would choose the mid-point item
; for the case of partially sorted data. I have chosen the first
; item to reduce the size of the partition at the start. Uncomment
; the following line if you choose another pivot method - it is not
; needed because pivot item lies between partitions by definition.
; dec esi ; counter first inc
inc ebp ; counter first dec

@low: inc esi
dec edi
js @donel ; esi is out of range or part of upper partition
COMPARE ecx,esi ; array, index1, index2
jge @low

; esi item needs to be moved into high partition
@high: dec ebp
dec edi
js @doneh ; ebp is out of range or part of lower partition
COMPARE ecx,ebp ; array, index1, index2
jle @high

EXCHANGE esi,ebp
jmp @low


; esi=ebp ; (esi) item part of upper partition
@doneh: inc ebp
@donel: dec esi
EXCHANGE ecx,esi
dec esi ; pivot is sorted
mov edi,[esp]
mov [esp],esi
mov esi,ebp
call PARTITION
pop edi
pop esi
jmp PARTITION

@exit: add esp,8
retn ;db 0C3h; force _REAL_ ret in MASM PROC
QuickSort ENDP

;Initial State: (general)
;
; esi ecx edi
; |---n---|-|---n---|
; P
;
;My choice of Pivot:
;
; esi edi
; |-|-------n-------|
; P
;
;
;Possible Results: (P = pivot index)
;
; esi edi
; |-|-------n-------| all items are >= pivot
; P inc esi, partition new range
;
; esi edi
; |-|-|-----n-------| one item =< pivot, rest >= pivot
; P inc esi, inc esi, partition new range
;
; esi--n--edi esi--n--edi
; |---n---|-|---n---| some items =< pivot, some items >= pivot
; P partition range #1, partition range #1
;
; esi edi
; |------n------|-|-| one item >= pivot, rest <= pivot
; P dec edi, partition new range
;
; esi edi
; |------n--------|-| all items are <= pivot
; P dec edi, dec edi, partition new range
;
Posted on 2001-12-17 23:09:33 by bitRAKE
Write a nonrecursive version bitrake, and do comparative timings.
I'm too lazy to do that sorta stuff myself, but I'd like to know how
much faster you can get a nonrecursive version :).
Posted on 2001-12-17 23:17:48 by f0dder
Hutch--, has a non-recursive version in MASM32 7.0. I like recursion - it makes for small code, and I think the above is faster than Hutch--'s. Realistically, testing them is real work - I'm sure they have different performance characteristics - like the string searches do. Hutch--'s is made just to sort DWORDs, mine is made to sort anything you can write the two macros for - you don't even need an array of pointers if you don't want - you could index the structures directly. It's general, and still pretty fast. I'm happy with it. :)

Edit: Okay, I'll try a non-recursive version, maybe. ;) There isn't much to some of those theories at the machine level. I'd be better to optimise the sorting of the small partitions.

Edit: {clip} :tongue: Need to think before hitting the keys.
Posted on 2001-12-17 23:39:23 by bitRAKE
Hi bitRAKE,

i have studied a bit the algo and found out that the strcmp function isn't case sensitive and btw a bit slow, i suggest to use a self made compare function. This is how it looks like then:
QuickSort proto :DWORD,:DWORD,:DWORD

EisoCmp proto :DWORD,:DWORD

QuickSort PROC uses esi edi ebx, qARRAY:DWORD, qLOW:DWORD, qHIGH:DWORD
push ebp ; an extra register, oh boy!
mov ebx, qARRAY
mov esi, qLOW
mov edi, qHIGH
call PARTITION
pop ebp
ret ; let MASM do it's "magic" on this ret

PARTITION:
push esi
push edi
mov ebp,edi
sub edi,esi ; width of partition - 2
jle @exit ; must be >1 elements to sort
mov ecx,esi ; choose pivot
inc ebp ; counter first dec
@low: inc esi
dec edi
js @donel ; esi is out of range or part of upper partition

invoke EisoCmp,DWORD PTR [ebx + ecx*4],DWORD PTR [ebx + esi*4]
cmp eax,0

jge @low
@high: dec ebp
dec edi
js @doneh ; ebp is out of range or part of lower partition

invoke EisoCmp,DWORD PTR [ebx + ecx*4],DWORD PTR [ebx + ebp*4]
cmp eax,0

jle @high
mov edx,[ebx + esi*4]
mov eax,[ebx + ebp*4]
mov [ebx + ebp*4],edx
mov [ebx + esi*4],eax
jmp @low
@doneh: inc ebp
@donel: dec esi
mov edx,[ebx + ecx*4]
mov eax,[ebx + esi*4]
mov [ebx + esi*4],edx
mov [ebx + ecx*4],eax


dec esi ; pivot is sorted
mov edi,[esp]
mov [esp],esi
mov esi,ebp
call PARTITION
pop edi
pop esi
jmp PARTITION

@exit: add esp,8
db 0C3h ; ret ; force _REAL_ ret in MASM

QuickSort ENDP


EisoCmp PROC a:DWORD,b:DWORD
push esi
push edi
xor eax,eax
mov esi,a
mov edi,b
next: mov al,[esi]
mov ah,[edi]
cmp ax,0
jz same
cmp al,ah
jb lower
ja higher
inc esi
inc edi
jmp next
same: xor eax,eax
pop edi
pop esi
ret
lower: or eax,-1
pop edi
pop esi
ret
higher: mov eax,1
pop edi
pop esi
ret
EisoCmp ENDP
It's performing faster and better now(smaller compare plus no push and po for ecx reg.), but still there is a bug somewhere and i can't find it out, because the stack is sorted now for 99%.

:alright:
Posted on 2002-06-03 09:46:59 by eisodur
Hello eisodur, see how easy it is to create quicksorts for different types of data? I think the error lies in not testing each string for the end. It should work okay if both strings are the same length, otherwise it overruns them.

I am wrong in my statement above - I don't know why it wouldn't work, but will test it later this evening.
Posted on 2002-06-03 10:03:50 by bitRAKE
Other sort routine (universal)



global SortTable:proc

; Free reg {eax,esi}
struc sSort
CompL dd ? ;>ecx=Index,edi=Item,<al=?(i(Item(Index))<(Item))
CompG dd ? ;>edx=Index,edi=Item,<al=?(i(Item(Index))>(Item))
Swap dd ? ;>ecx,edx=Indexs
Create dd ? ;>edi=Index; <edi=Item
Destroy dd ? ;>edi=Item
ends

; ebx=sSort
; ecx=Left
; edx=Right
proc SortTable
push ecx ; ecx=cL; [esp]=Left ??????????
mov ebp,edx ; edx=cR; ebp =Right Left ? Right
lea edi,[ecx+edx]
shr edi,1 ; edi=(cL+cR)/2
call [ebx+sSort.Create] ; cM=Create(edi)
dec ecx
@@lpL: inc ecx ; cL++
@@lpV: call [ebx+sSort.CompL] ; ? CompL(I(cL),cM)
test al,al
jne @@lpL
inc edx
@@lpR: dec edx ; cR--
call [ebx+sSort.CompG] ; ? CompG(I(cR),cM)
test al,al
jne @@lpR
cmp ecx,edx ; cL,cR
je @@skS_
jg @@skS
call [ebx+sSort.Swap] ; Swap(cL,cR)
@@skS_: inc ecx ; cL++
dec edx ; cR--
@@skS: cmp ecx,edx ; ? (cL<=cR)
jle @@lpV
call [ebx+sSort.Destroy] ; Destroy(cM)
pop eax ; eax=Left
cmp edx,eax ; ? (cR<Left)
jl @@skS0
push ecx ebp ; cL Right
mov ecx,eax ; ecx=Left
call SortTable ; SortTable(Left,cR)
pop ebp ecx ; Right cL
@@skS0: cmp ecx,ebp ; ? (cL>=Right)
jge @@ret
mov edx,ebp ; edx=Right
jmp SortTable ; SortTable(cL,Right)
@@ret: ret
endp
Posted on 2002-06-03 10:05:41 by Nexo
Nexo, why are there CompL, CompG?
COMPARE macro item1,item2

mov eax,item1
mov edx,item2
call [ebx+sSort.Comp]
ENDM

EXCHANGE macro item1,item2
mov eax,item1
mov edx,item2
call [ebx+sSort.Swap]
ENDM
Now each array is an object with methods and the QuickSort code isn't duplicated. :)


eisodur, how about this?
COMPARE macro item1,item2

LOCAL next,done

push ecx
mov edx,[ebx + item1*4]
mov ecx,[ebx + item2*4]
next:
mov al,[edx]
inc edx
cmp al,[ecx]
jne done
inc ecx
cmp al,0
jne next
done:
pop ecx
ENDM
Much less overhead than your PROC. :)
Posted on 2002-06-03 22:48:58 by bitRAKE
Hello there,

it's a fast one but i think something is missing...


(You do the compare but return no results so it's just everytime eax>0. Even when the strings are same)
Posted on 2002-06-04 03:32:35 by eisodur
eisodur, I don't use AL for the return value - the flags are set with the correct relationship < = >. :) The only thing I see wrong is that it is a signed byte comparision and it should be unsigned, imho.

Unsigned byte string compare:
COMPARE macro item1,item2

LOCAL next,done

push edi
push ecx
xor eax,eax ; remove if movzx is used
xor edx,edx
mov edi,[ebx + item1*4]
; item1/item2 could be ECX, so store last into ECX
mov ecx,[ebx + item2*4]
next:
mov al,[edi] ; movzx eax, BYTE PTR [edi] is better
inc edi
mov dl,[ecx] ; ; movzx edx, BYTE PTR [ecx] is better
inc ecx
cmp eax,edx
jne done
cmp eax,0
jne next
done:
pop ecx
pop edi
ENDM
I'd be better use to change the Jcc's in the algo for unsigned branching. :)
Posted on 2002-06-04 06:00:25 by bitRAKE
I give here a result of my stack before and after the call to QuickSort, hope that it says enough to someone to see what's going wrong.

The unsorted stack just before the call:

00ECFF68 00ECFF74
00ECFF6C 00000000
00ECFF70 0000000D
00ECFF74 00ED0056 ASCII "ALLES"
00ECFF78 00ED0050 ASCII "ALLEs"
00ECFF7C 00ED004C ASCII "als"
00ECFF80 00ED0048 ASCII "BBB"
00ECFF84 00ED0044 ASCII "AAA"
00ECFF88 00ED0040 ASCII "ALL"
00ECFF8C 00ED003C ASCII "ALL"
00ECFF90 00ED0038 ASCII "all"
00ECFF94 00ED0034 ASCII "all"
00ECFF98 00ED0030 ASCII "ALL"
00ECFF9C 00ED002C ASCII "AAB"
00ECFFA0 00ED0028 ASCII "CCC"
00ECFFA4 00ED0024 ASCII "BBB"
00ECFFA8 00ED0020 ASCII "AAA"

Now the sorted stack just after QuickSort is done:

00ECFF74 00ED0020 ASCII "AAA"
00ECFF78 00ED0044 ASCII "AAA"
00ECFF7C 00ED002C ASCII "AAB"
00ECFF80 00ED0030 ASCII "ALL"
00ECFF84 00ED0040 ASCII "ALL"
00ECFF88 00ED003C ASCII "ALL"
00ECFF8C 00ED0056 ASCII "ALLES"
00ECFF90 00ED0038 ASCII "all"
00ECFF94 00ED0050 ASCII "ALLEs"
00ECFF98 00ED0048 ASCII "BBB"
00ECFF9C 00ED0024 ASCII "BBB"
00ECFFA0 00ED0028 ASCII "CCC"
00ECFFA4 00ED0034 ASCII "all"
00ECFFA8 00ED004C ASCII "als"

-is the jle @exit not timed well?
-or is the pivot not replaced well?

I don't know, but i'm beginning to hallucinate here and nead a brake after debugging for 2 days.:confused:
Posted on 2002-06-04 08:10:15 by eisodur
I HAVE IT AND I AM REALLLLLY GLAD!!:grin: :grin:

The problem was that there is one to many inc ebp.

When this happens there is skipped one element in the partioning.

Here follows the QuickSort that works with me:
QuickSort PROC uses esi edi ebx, qARRAY:DWORD, qLOW:DWORD, qHIGH:DWORD

push ebp ; an extra register, oh boy!
mov ebx, qARRAY
mov esi, qLOW
mov edi, qHIGH
call PARTITION
pop ebp
ret ; let MASM do it's "magic" on this ret

PARTITION:
push esi
push edi
mov ebp,edi
sub edi,esi ; width of partition - 2
jle @exit ; must be >1 elements to sort
mov ecx,esi ; choose pivot
inc ebp ; counter first dec
@low: inc esi
dec edi
js @donel ; esi is out of range or part of upper partition

invoke EisoCmp,DWORD PTR [ebx + ecx*4],DWORD PTR [ebx + esi*4]
cmp eax,0

jge @low
@high: dec ebp
dec edi
js @donel ; ebp is out of range or part of lower partition

invoke EisoCmp,DWORD PTR [ebx + ecx*4],DWORD PTR [ebx + ebp*4]
cmp eax,0

jle @high
mov edx,[ebx + esi*4]
mov eax,[ebx + ebp*4]
mov [ebx + ebp*4],edx
mov [ebx + esi*4],eax
jmp @low

@donel: dec esi
mov edx,[ebx + ecx*4]
mov eax,[ebx + esi*4]
mov [ebx + esi*4],edx
mov [ebx + ecx*4],eax


dec esi ; pivot is sorted
mov edi,[esp]
mov [esp],esi
mov esi,ebp
call PARTITION
pop edi
pop esi
jmp PARTITION

@exit: add esp,8
db 0C3h ; ret ; force _REAL_ ret in MASM

QuickSort ENDP


EisoCmp PROC a:DWORD,b:DWORD
push esi
push edi
xor eax,eax
mov esi,a
mov edi,b
next: mov al,[esi]
mov ah,[edi]
cmp ax,0
jz same
cmp al,ah
jb lower
ja higher
inc esi
inc edi
jmp next
same: xor eax,eax
pop edi
pop esi
ret
lower: or eax,-1
pop edi
pop esi
ret
higher: mov eax,1
pop edi
pop esi
ret
EisoCmp ENDP
This code is fine you can optimise it with bitRAKE's strcmp macro, but do not try it with the ole32 strcmp.

Thanks goes 2 bitRake for this fine QuickSort routine.

:alright:
Posted on 2002-06-04 09:03:09 by eisodur
Keep up the good work folks, sort routines are very useful, particularly when the can do a multiple of different data types. I dabbled with sort routines last year but ran out of time to play with them, when I get some more time and a bit more working brain, I would like to have a go at them again as there is some interesting stuff that can be done.

Hybrid sorts seem to have the legs at the moment, I had a lot of useful input from JIBZ who was working on them as well, Classical quick sorts combined with alternative sort types when the quick sort count went too low helped them to get faster.

I think the C++ STL INTRO sort is the current fast one but there is room to go faster if its done right.

Regards,

hutch@movsd.com
Posted on 2002-06-04 09:42:59 by hutch--

I HAVE IT AND I AM REALLLLLY GLAD!!:grin: :grin:

The problem was that there is one to many inc ebp.

When this happens there is skipped one element in the partioning.
That is great! Sounds like there was bug in the routine.
Thanks for the help. :)
Posted on 2002-06-04 10:23:54 by bitRAKE
Stack usage

A common implementation is to always subpartition in the same order. In this case, the worst case stack usage is proportional to the number of items to sort (N). Stack usage can be reduced to log2(N) by choosing to subpartition the smaller partition before the larger one.
Posted on 2002-06-04 16:35:03 by tenkey
tenkey, thanks - I will implement in next revision.
Posted on 2002-06-04 17:22:34 by bitRAKE
I just had a poke tyhrough the stuff I was working on last year and found a working algo that may be useful in writing a hybrid sort algo where the main work is done by a quick sort and the auxilary sort is done by an insertion sort.

I have seen the insertion sort used in a couple of ways, one is to check the gap size that the quick sort is working on and under a threshold, pass it off to the insertion sort as it is a lot more efficient on small gaps than the quick sort.

Hope it is useful, Jibz coded the original in C.

Regards,

hutch@movsd.com



; ########################################################################

insertsort2 proc Arr:DWORD, lb:DWORD, ub:DWORD

; parameters
; -- -------------------------------------------------
; Arr base address of array
; lb index of lower member of array to sort
; ub index of upper member of array to sort
; ---------------------------------------------------

push ebx
push esi
push edi

mov esi, Arr ; offset of array in ESI
mov eax, lb ; lower bound
shl eax, 2 ; mul by 4
add esi, eax ; add offset to array
mov ecx, 1
inc ub
mov ebx, ub ; upper bound in EBX
sub ebx, lb ; sub lower bound from upper

iStart:
mov edi, [esi+ecx*4]
mov edx, ecx

inner:
; -----------------
; unroll loop by 2
; -----------------
mov eax, [esi+edx*4-4]
cmp eax, edi
jle wExit
mov [esi+edx*4], eax
dec edx
jz wExit

mov eax, [esi+edx*4-4]
cmp eax, edi
jle wExit
mov [esi+edx*4], eax
dec edx
jnz inner

wExit:
mov [esi+edx*4], edi
inc ecx
cmp ecx, ebx
jl iStart

pop edi
pop esi
pop ebx

ret

insertsort2 endp

; ##########################################################################
Posted on 2002-06-04 21:14:03 by hutch--
Thanks, Hutch--. There is much here to think about. Don't we usually search a large body of information, fitting it to a model, and then sort? I mean in a very general sense of software mechanics. A person could keep themselves busy for the rest of their life with just sort and search. :grin:
Posted on 2002-06-04 22:51:31 by bitRAKE

Hope it is useful, Jibz coded the original in C.


Hi.
If you still have the C version, I would be glad to see it.

Thanks.
Posted on 2002-06-05 00:58:13 by JCP

Nexo, why are there CompL, CompG?
COMPARE macro item1,item2

mov eax,item1
mov edx,item2
call [ebx+sSort.Comp]
ENDM

EXCHANGE macro item1,item2
mov eax,item1
mov edx,item2
call [ebx+sSort.Swap]
ENDM
Now each array is an object with methods and the QuickSort code isn't duplicated. :)

It is not always good. For example:


Create
mov edi,[Array+4*edi]

Swap
mov eax,[Array+4*ecx]
mov esi,[Array+4*edx]
mov [Array+4*edx],eax
mov [Array+4*ecx],esi

CompL
cmp [Array+4*ecx],edi

CompG
cmp [Array+4*edx],edi
Posted on 2002-06-05 12:26:47 by Nexo