Ok, I'm having a few problems with my inline asm next permutation algorithm. Any help would be appreciated.
Basically this algo seems to have problems when finding the next permutation of the data:
{1,5,4,3,2}
Here is the C version which works fine:
This algo returns:
{2,1,3,4,5}
Which is correct.
The asm algo produces some strange results:
This algo returns:
{2,1,4,0,5}
Strangely enough, if i add the inline statement before the declaration of this algo like so "void inline NextPerm(BYTE* a, BYTE n);"
it returns:
{2,1,4,2,5}
Ok, I know this code is ugly, but it's far from being optimized it's just a prototype. It works fine for smaller data inputs.
Now for debugging purposes you'll notice the printf statements at the end of each algo.
They return the same values for the asm and the c algos.
asm c
j 0 0
k 4 4
r 2 2
s 3 3
So my question is why does the asm algo return different values then the c one? And also why does the asm function return different values if i add the inline statement?
Thanks guys,
JP
Basically this algo seems to have problems when finding the next permutation of the data:
{1,5,4,3,2}
Here is the C version which works fine:
void inline NextPerm1(BYTE* a, BYTE n)
{
register int j = n - 1;
while (a[j] > a [j+1])
{
j--;
}
register int k = n;
while (a[j] > a[k])
{
k--;
}
Swap(&a[j],&a[k]);
register int r = n;
register int s = j + 1;
while (r > s)
{
Swap(&a[r],&a[s]);
r--;
s++;
}
printf("\nj: %d, k: %d, r: %d, s: %d\n", j,k,r,s);
}
This algo returns:
{2,1,3,4,5}
Which is correct.
The asm algo produces some strange results:
void NextPerm(BYTE* a, BYTE n)
{
unsigned long j, k, r, s;
j = 0;
k = 0;
r = 0;
s = 0;
_asm
{
pushad
xor eax, eax
xor ebx, ebx
xor ecx, ecx
xor edx, edx
xor esi, esi
xor edi, edi
//j = ecx
//k = ebx
movzx edi, n
dec edi
mov ecx, edi //j = n - 1
while1:
mov esi, a
add esi, ecx
movzx eax, [esi]
movzx ebx, [esi + 1]
cmp eax, ebx
jle wend1 // while (a[j] > a[j + 1])
dec ecx // j--
jmp while1
wend1:
mov j, ecx
xor eax, eax
xor ebx, ebx
xor ecx, ecx
xor edx, edx
xor esi, esi
xor edi, edi
movzx ebx, n //k = n
mov k, ebx
mov esi, a
mov ecx, j
add esi, ecx //a[j]
movzx eax, [esi] //eax = a[j]
while2:
mov edx, a
add edx, ebx
movzx ecx, [edx]
cmp eax, ecx
jle wend2 //while (a[j] > a[k])
dec ebx //k--
jmp while2
wend2:
mov k, ebx
pushad
push esi
push edx
call Swap //swap(&a[j], &a[k])
add esp, 8
popad
xor eax, eax
xor ebx, ebx
xor ecx, ecx
xor edx, edx
xor esi, esi
xor edi, edi
mov esi, a
mov edx, a
movzx ebx, n //r = n
mov r, ebx
mov ecx, j
inc ecx
mov s, ecx //s = j + 1
while3:
cmp ebx, ecx
jle wend3 //while ( r > s)
add esi, ebx //a + r
add edx, ecx //a + s
pushad
push esi
push edx
call Swap //swap(&a[r], &a[s])
add esp, 8
popad
dec ebx //r--
inc ecx //s++
jmp while3
wend3:
mov r, ebx
mov s, ecx
popad
}
printf("\nj: %d, k: %d, r: %d, s: %d\n", j,k,r,s);
}
This algo returns:
{2,1,4,0,5}
Strangely enough, if i add the inline statement before the declaration of this algo like so "void inline NextPerm(BYTE* a, BYTE n);"
it returns:
{2,1,4,2,5}
Ok, I know this code is ugly, but it's far from being optimized it's just a prototype. It works fine for smaller data inputs.
Now for debugging purposes you'll notice the printf statements at the end of each algo.
They return the same values for the asm and the c algos.
asm c
j 0 0
k 4 4
r 2 2
s 3 3
So my question is why does the asm algo return different values then the c one? And also why does the asm function return different values if i add the inline statement?
Thanks guys,
JP
Oh, and if you want to fiddle around with it here is the whole c file.
Input n = 5.
And I'm using vc6.
Input n = 5.
And I'm using vc6.
Look at this code in the beginning:
Since ecx at the beginning is 4, the "movzx ebx, " references an item outside your array (the 6th item in a 5 element array). This may be an unpredicted value, which would explain the other random results you get.
mov esi, a
add esi, ecx
movzx eax, [esi]
movzx ebx, [esi + 1]
Since ecx at the beginning is 4, the "movzx ebx, " references an item outside your array (the 6th item in a 5 element array). This may be an unpredicted value, which would explain the other random results you get.
At that point ecx is 3, not 4.
If you download main.txt you'll notice the value that gets passed is n - 1.
Thanks,
JP
If you download main.txt you'll notice the value that gets passed is n - 1.
Thanks,
JP
Correct. Now I got it. In the last loop:
You never reassign a to esi, so esi increases outside of your array.
add esi, ebx
add edx, ecx
You never reassign a to esi, so esi increases outside of your array.
BINGO!!
Thanks, I don't know why I didn't see that. I assigned it in all the other loops. Anyways I could kiss you right now!! Thanks! Now on to the fun optimization.
Thanks, I don't know why I didn't see that. I assigned it in all the other loops. Anyways I could kiss you right now!! Thanks! Now on to the fun optimization.