Well, I need a fast assembly routine to sort from 1 to 256 integers.
I decided to use the Shell-Metzner sort.
Sedgewick proposed the following routine:
shellsort(itemType a[], int l, int r)
{ int i, j, k, h; itemType v;
int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
13776, 4592, 1968, 861, 336,
112, 48, 21, 7, 3, 1 };
for ( k = 0; k < 16; k++)
for (h = incs, i = l+h; i <= r; i++)
{
v = a; j = i;
while (j > h && a > v)
{ a = a; j -= h; }
a = v;
}
}
Can somebody write a fast assembly version ?
Thanks !
JC
I decided to use the Shell-Metzner sort.
Sedgewick proposed the following routine:
shellsort(itemType a[], int l, int r)
{ int i, j, k, h; itemType v;
int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
13776, 4592, 1968, 861, 336,
112, 48, 21, 7, 3, 1 };
for ( k = 0; k < 16; k++)
for (h = incs, i = l+h; i <= r; i++)
{
v = a; j = i;
while (j > h && a > v)
{ a = a; j -= h; }
a = v;
}
}
Can somebody write a fast assembly version ?
Thanks !
JC
Ok, here is my first implementation.
It is able to sort 128 words at most.
input: esi=array of words to sort (2 bytes per element !!)
ebp=number of elements to sort-1
output: esi=sorted array
Ok, the code is ugly, but it's generated with a C program for a special purpose.
I'm pretty sure this can be improved.
JC
shellsort:
lea edi,
lea ebx,
cmp ebx, edi
ja shell0
loopashell0:
mov dx,
lea ecx,
cmp ecx, esi
jb skipshell0
loopbshell0:
cmp dx,
jae skipshell0
mov ax,
mov , ax
sub ecx, 96
cmp ecx, esi
jae loopbshell0
skipshell0:
mov , dx
add ebx, 2
cmp ebx, edi
jbe loopashell0
shell0:
lea ebx,
cmp ebx, edi
ja shell1
loopashell1:
mov dx,
lea ecx,
cmp ecx, esi
jb skipshell1
loopbshell1:
cmp dx,
jae skipshell1
mov ax,
mov , ax
sub ecx, 42
cmp ecx, esi
jae loopbshell1
skipshell1:
mov , dx
add ebx, 2
cmp ebx, edi
jbe loopashell1
shell1:
lea ebx,
cmp ebx, edi
ja shell2
loopashell2:
mov dx,
lea ecx,
cmp ecx, esi
jb skipshell2
loopbshell2:
cmp dx,
jae skipshell2
mov ax,
mov , ax
sub ecx, 14
cmp ecx, esi
jae loopbshell2
skipshell2:
mov , dx
add ebx, 2
cmp ebx, edi
jbe loopashell2
shell2:
lea ebx,
cmp ebx, edi
ja shell3
loopashell3:
mov dx,
lea ecx,
cmp ecx, esi
jb skipshell3
loopbshell3:
cmp dx,
jae skipshell3
mov ax,
mov , ax
sub ecx, 6
cmp ecx, esi
jae loopbshell3
skipshell3:
mov , dx
add ebx, 2
cmp ebx, edi
jbe loopashell3
shell3:
lea ebx,
cmp ebx, edi
ja shell4
loopashell4:
mov dx,
lea ecx,
cmp ecx, esi
jb skipshell4
loopbshell4:
cmp dx,
jae skipshell4
mov ax,
mov , ax
sub ecx, 2
cmp ecx, esi
jae loopbshell4
skipshell4:
mov , dx
add ebx, 2
cmp ebx, edi
jbe loopashell4
shell4:
ret
Sorry for the indentation.
It is able to sort 128 words at most.
input: esi=array of words to sort (2 bytes per element !!)
ebp=number of elements to sort-1
output: esi=sorted array
Ok, the code is ugly, but it's generated with a C program for a special purpose.
I'm pretty sure this can be improved.
JC
shellsort:
lea edi,
lea ebx,
cmp ebx, edi
ja shell0
loopashell0:
mov dx,
lea ecx,
cmp ecx, esi
jb skipshell0
loopbshell0:
cmp dx,
jae skipshell0
mov ax,
mov , ax
sub ecx, 96
cmp ecx, esi
jae loopbshell0
skipshell0:
mov , dx
add ebx, 2
cmp ebx, edi
jbe loopashell0
shell0:
lea ebx,
cmp ebx, edi
ja shell1
loopashell1:
mov dx,
lea ecx,
cmp ecx, esi
jb skipshell1
loopbshell1:
cmp dx,
jae skipshell1
mov ax,
mov , ax
sub ecx, 42
cmp ecx, esi
jae loopbshell1
skipshell1:
mov , dx
add ebx, 2
cmp ebx, edi
jbe loopashell1
shell1:
lea ebx,
cmp ebx, edi
ja shell2
loopashell2:
mov dx,
lea ecx,
cmp ecx, esi
jb skipshell2
loopbshell2:
cmp dx,
jae skipshell2
mov ax,
mov , ax
sub ecx, 14
cmp ecx, esi
jae loopbshell2
skipshell2:
mov , dx
add ebx, 2
cmp ebx, edi
jbe loopashell2
shell2:
lea ebx,
cmp ebx, edi
ja shell3
loopashell3:
mov dx,
lea ecx,
cmp ecx, esi
jb skipshell3
loopbshell3:
cmp dx,
jae skipshell3
mov ax,
mov , ax
sub ecx, 6
cmp ecx, esi
jae loopbshell3
skipshell3:
mov , dx
add ebx, 2
cmp ebx, edi
jbe loopashell3
shell3:
lea ebx,
cmp ebx, edi
ja shell4
loopashell4:
mov dx,
lea ecx,
cmp ecx, esi
jb skipshell4
loopbshell4:
cmp dx,
jae skipshell4
mov ax,
mov , ax
sub ecx, 2
cmp ecx, esi
jae loopbshell4
skipshell4:
mov , dx
add ebx, 2
cmp ebx, edi
jbe loopashell4
shell4:
ret
Sorry for the indentation.
Sorry for the indentation.
... or lack there of.
incs DWORD 1391376, 463792, 198768, 86961, 33936, 13776, 4592, 1968, 861, 336, 112, 48, 21, 7, 3, 1
shellsort PROC USES ebx esi edi, _a:DWORD, _l:DWORD, _r:DWORD
a EQU <edi>
l EQU <_l>
r EQU <_r>
ii EQU <ecx>
j EQU <edx>
k EQU <eax>
h EQU <edi>
v EQU <ebx>
mov a, _a
; for ( k = 0; k < 16; k++)
mov k, 0
; for (h = incs[k], i = l+h; i <= r; i++)
_0: mov h, [incs][k*4]
mov ii, h
add ii, l
_1: ; v = a[i]; j = i;
mov v, [a][ii*4]
mov j, ii
jmp _3
; while (j > h && a[j-h] > v) { a[j] = a[j-h]; j -= h; }
_2: push [a][j*4]
add j, h
pop [a][j*4]
sub j, h
_3: sub j, h
jle _4
cmp v, [j*4]
jg _2
_4: add j, h
; a[j] = v;
mov [a][j*4], v
inc ii
cmp ii, r
jle _1
inc k
cmp k, SIZEOF incs
jne _0
ret
shellsort ENDP
Warning: I have not tested this. ;)
Hope my translation does not confuse you.
Is this for another contest?