i was working forever trying to get the bug out of my MMX effect, however i realised it was based on bad data entering the effect..
so here is the code i used to generate 2 64 bit MMX variables
__m64 lowkey;
__m64 highkey;

short redlow = (short) m_fRedLow;
short redhigh = (short) m_fRedHigh;
short greenlow = (short) m_fGreenLow;
short greenhigh = (short) m_fGreenHigh;
short bluelow = (short) m_fBlueLow;
short bluehigh = (short)m_fBlueHigh;
lowkey = _mm_set_pi16(-1,redlow,greenlow,bluelow);
highkey = _mm_set_pi16(256,redhigh,greenhigh,bluehigh);

HOWEVER it just wouldn't work as it should.. but strangely if i inserted code between assigning the shorts (i.e some debug code to test that those shorts arewhat they should be, or even unrelated code) it would 'magically' work
perplexed me soo much. so i compiled to assembly to see what would happen. it seemed if i had the code as above VC optomised it without using shorts to store it converting from the float point on the fly the same like:

lowkey = _mm_set_pi16(-1,(short) m_fRedLow,(short)m_fGreenLow,(short) m_fBlueLow);

and apparenty the intrinsic is buggy like this..
however if i put code in between, it first converts teh floating points to shorts , stores them on teh stack, and when the instrict is being run, its just getting the already shortened (pun) shorts and works perfectly..


Here is an assembler snippet WITHOUT something in between

; 226 : lowkey = _mm_set_pi16(-1,redlow,greenlow,bluelow);

fld DWORD PTR
imul eax, DWORD PTR
push edi
mov DWORD PTR _numpixels$, eax
call __ftol2
mov WORD PTR tv235, ax
fld DWORD PTR
call __ftol2
fld DWORD PTR
mov WORD PTR tv235, ax
call __ftol2
mov WORD PTR tv235, ax
mov WORD PTR tv235, -1
movq mm0, MMWORD PTR tv235
movq MMWORD PTR _lowkey$, mm0

; 227 : highkey = _mm_set_pi16(256,redhigh,greenhigh,bluehigh);

fld DWORD PTR
call __ftol2
fld DWORD PTR
mov WORD PTR tv219, ax
call __ftol2
fld DWORD PTR
mov WORD PTR tv219, ax
call __ftol2

acutally there is more here i forgot to paste in , how however its the same as 226's one which ends in 227.. so you can just see
how 226 is buggy

and here is the snippet if i put something in between (this is the one which works as it should)
where this is the C++ code that is put in between
DWORD* pDest ;
DWORD* pSource1, *pSource2;
pSource1 = (DWORD*)ppInput[0]->GetBuffer();
pSource2 = (DWORD*)ppInput[1]->GetBuffer();

and here is the assembler

; 186 : short redlow = (short) m_fRedLow;

fld DWORD PTR
imul eax, DWORD PTR
push edi
mov DWORD PTR _numpixels$, eax
call __ftol2

; 187 : short redhigh = (short) m_fRedHigh;

fld DWORD PTR
mov edi, eax
call __ftol2

; 188 : short greenlow = (short) m_fGreenLow;

fld DWORD PTR
mov DWORD PTR _redhigh$, eax
call __ftol2

; 189 : short greenhigh = (short) m_fGreenHigh;

fld DWORD PTR
mov DWORD PTR _greenlow$, eax
call __ftol2

; 190 : short bluelow = (short) m_fBlueLow;

fld DWORD PTR
mov DWORD PTR _greenhigh$, eax
call __ftol2

; 191 : short bluehigh = (short)m_fBlueHigh;

fld DWORD PTR
mov DWORD PTR _bluelow$, eax
call __ftol2
; 223 : DWORD* pDest ;
; 224 : DWORD* pSource1, *pSource2;
; 225 : pSource1 = (DWORD*)ppInput[0]->GetBuffer();

mov ecx, DWORD PTR _ppInput$
mov ecx, DWORD PTR
mov edx, DWORD PTR
mov esi, eax
call DWORD PTR
mov DWORD PTR _pSource1$, eax

; 226 : pSource2 = (DWORD*)ppInput[1]->GetBuffer();

mov eax, DWORD PTR _ppInput$
mov ecx, DWORD PTR
mov edx, DWORD PTR
call DWORD PTR

; 231 : lowkey = _mm_set_pi16(-1,redlow,greenlow,bluelow);

mov cx, WORD PTR _greenlow$

; 232 : highkey = _mm_set_pi16(256,redhigh,greenhigh,bluehigh);

mov dx, WORD PTR _greenhigh$
mov DWORD PTR _pSource2$, eax
movzx eax, WORD PTR _bluelow$
mov WORD PTR tv184, ax
movzx eax, WORD PTR _redhigh$
mov WORD PTR tv184, cx

; 233 :
; 234 :
; 235 :
; 236 : pDest = (DWORD*)pOutput->GetBuffer();

mov ecx, DWORD PTR _pOutput$
mov WORD PTR tv184, di
mov WORD PTR tv184, -1
movq mm0, MMWORD PTR tv184
mov WORD PTR tv179, dx
mov edx, DWORD PTR
mov WORD PTR tv179, ax
movq MMWORD PTR _lowkey$, mm0
mov WORD PTR tv179, si
mov WORD PTR tv179, 256 ; 00000100H
movq mm0, MMWORD PTR tv179
movq MMWORD PTR _highkey$, mm0

..
you can definately see the difference
i don'treally know the asm floating point instructions so maybe somebody who knows can work out what vc7 is doing wrong in the first example
and maybe somebody can tell me how i submit a bug to microsoft?

Cheers,

karl
Posted on 2004-07-01 20:37:24 by klumsy
somebody on #coders narrowed down the problem for me..

silly msvc++ optomises its instrict, mixing in floating point code.. thus MMX and floating point code in the same place before EMMS...
not a nice situation.

anybody know how i can report this to microsoft?
Posted on 2004-07-01 21:56:30 by klumsy
Posted on 2004-07-02 19:59:24 by comrade