Hello to all,  I am writing a routine to alter from 0% to 200% the value of the colors RGB of each pixel of an image. In C++ worked well, but in assembly not! I need code very optimized.
the task to be executed is the following:
(B * B_factor) / 128
iB>255 then B=255

(G * G_factor) / 128
B>255 then B=255

(R * R_factor) / 128
R>255 then R=255

It's very simple...
my code in assembly is: 

label:     
    xor ax,ax
      mov al, //mov B to al
      mul fb            //B_factor * al
      shr ax,7          //B / 128
      cmp ax,0x00FF
      jle B1
      mov ,0xFF
      jmp B2
    B1:
      mov ,al //returns result
    B2:
      //now for green
      xor ax,ax
      mov al,
      mul fg       
      shr ax,7     
      cmp ax,0x00FF
      jle G1
      mov ,0xFF
      jmp G2
    G1:
      mov ,al
    G2:
      //and red
      xor ax,ax
      mov al,
      mul fr     
      shr ax,7     
      cmp ax,0x00FF
      jle R1
      mov ,0xFF
      jmp R2
    R1:
      mov ,al
    R2:
  add ebx,3  //next p?xel
  dec ecx     //decrease counter
  jnz label     //not zero, then go to label

somebody knows where is the problem?
I will be awaiting a help, thanks for all!
Posted on 2005-06-16 17:56:35 by silvio_at
I have a little program in FASM to do that, compiles to a raw binary using relocatable code. However the math routine does the same, maybe it's of any use to you... :)
Attachments:
Posted on 2005-06-16 19:12:12 by QvasiModo
If you need it optimized, then MMX is the way here :) MMX instructions will process more values / cycle, and you will get less jumps.
Posted on 2005-06-17 19:20:41 by ti_mo_n
My first attempt at MMX. It should work correctly, the code passed all tests I did for it.

DoBrightness proc uses eax ebx ecx edx pRGBbits,dwWidth,dwHeight,cBrightness
local bytesToRender
;------[ verify arguments ]----------------\
xor eax,eax
cmp pRGBbits,eax
je _ret
cmp dwWidth,0
je _ret
cmp dwHeight,0
je _ret
and cBrightness,255
;------------------------------------------/
;-----[ compute size of bitmap ]------\
mov eax,dwWidth
imul eax,dwHeight
lea eax,
mov bytesToRender,eax
;-------------------------------------/
;------[ quick-set to black ]-------------\
.if cBrightness==0
push edi
mov edi,pRGBbits
mov ecx,bytesToRender
xor eax,eax
shr ecx,2
rep stosd
mov ecx,bytesToRender
and ecx,3
rep stosb
pop edi
jmp _ret
.endif
;-----------------------------------------/
;-----[ clone cBrightness ]----------\
mov eax,cBrightness
cmp al,128
je _ret ; no change in brightness, exit
mov ah,al
mov dx,ax
shl eax,16
mov ax,dx
mov cBrightness,eax

;;;--[ see if is power of two ]----\
;;dec dl
;;.if al==1 || al==2 || !(al & dl)
;; mov IsBrightnessPowerOfTwo,1
;; bsf dx,ax
;; movzx edx,dx
;; sub edx,7
;; print edx
;;.endif (unfinished)
;;;--------------------------------/

;------------------------------------/

mov ebx,pRGBbits

;---------[ align to qwords ]----------\
mov edx,cBrightness
@@:
    test ebx,7
    jz @F
mov al,
mul dl
shr ax,7
.if ax>255
mov al,255
.endif
mov ,al
inc ebx
dec bytesToRender
jz _ret
    jmp  @B
@@:
;--------------------------------------/

;-----------[ blend 32-byte blocks ]-----------------------------\
mov ecx,bytesToRender
shr ecx,5
    jz done_32byte_blocks
;------[ prepare registers and data ]--------------\
mov eax,ecx
shl eax,5
sub bytesToRender,eax
pxor mm4,mm4 ; mm4=0
movd mm5,cBrightness ; mm6="value" in the 4 bytes
punpcklbw mm5,mm4 ; mm6="value" in the 4 words
;--------------------------------------------------/
do_32byte_block:
;--------------[ do_32byte_block ]-------------\
movq mm0,
movq mm1,
movq mm2,
movq mm3,


;------[ 8-byte block ]----------------\
movq mm6,mm0 ; 4 bytes low
punpcklbw mm6,mm4
pmullw mm6,mm5

movq mm7,mm0 ; 4 bytes high
punpckhbw mm7,mm4
pmullw mm7,mm5

psrlw mm6,7
psrlw mm7,7
packuswb mm6,mm7
movq mm0,mm6
;--------------------------------------/

;------[ 8-byte block ]----------------\
movq mm6,mm1
punpcklbw mm6,mm4
pmullw mm6,mm5
movq mm7,mm1
punpckhbw mm7,mm4
pmullw mm7,mm5
psrlw mm6,7
psrlw mm7,7
packuswb mm6,mm7
movq mm1,mm6
;--------------------------------------/

;------[ 8-byte block ]----------------\
movq mm6,mm2
punpcklbw mm6,mm4
pmullw mm6,mm5
movq mm7,mm2
punpckhbw mm7,mm4
pmullw mm7,mm5
psrlw mm6,7
psrlw mm7,7
packuswb mm6,mm7
movq mm2,mm6
;--------------------------------------/

;------[ 8-byte block ]----------------\
movq mm6,mm3
punpcklbw mm6,mm4
pmullw mm6,mm5
movq mm7,mm3
punpckhbw mm7,mm4
pmullw mm7,mm5
psrlw mm6,7
psrlw mm7,7
packuswb mm6,mm7
movq mm3,mm6
;--------------------------------------/


movq ,mm0
movq ,mm1
movq ,mm2
movq ,mm3

;----------------------------------------------/
add ebx,32
dec ecx
jnz do_32byte_block

EMMS  ; exit MMX world

    done_32byte_blocks:
;----------------------------------------------------------------/



;------[ do 1-byte blocks ]---------------\
mov ecx,bytesToRender
or ecx,ecx
jz _ret

mov edx,cBrightness
@@:
mov al,
mul dl
shr ax,7
.if ax>255
mov al,255
.endif
mov ,al
inc ebx
dec ecx
jnz @B
;-----------------------------------------/



_ret: ret
DoBrightness endp
Posted on 2005-06-18 07:54:49 by Ultrano
  Thanks to all that helped me, I found the error, all the information that you passed for me are of great importance. MMX is the  fast solution obviously, thanks Ultrano.
thanks to everyone.
Posted on 2005-06-18 09:17:43 by silvio_at
You're welcome :)
Besides, thanks to your topic I finally got my hands on the sweet MMX instructions ;) . I'll need them soon in one of my projects - a videoclip maker.  And thus this DoBrightness proc might be useful for me too ^_^
Posted on 2005-06-18 10:19:27 by Ultrano
Ah had one error in that code, fixed it above. I just had misplaced "psraw" instead of "psrlw" (arithmetic instead of logical shift).
Tested it with real bitmaps this time, looks well enough :)
Attachments:
Posted on 2005-06-18 10:36:32 by Ultrano