I hope that I give you good challenges in optimizing problems. This time I translated the Delphi code to assembly and it?s only 15ms faster. Delphi code runs in 140ms and asm code in 125ms in a Pentium 4 3.2 GHz with 1GB RAM. The problem is to find the edge to a monochromatic image. In the tests the image was 3507x4960 pixels.

Can anyone suggest a better code or algorithm? The use of a table to avoid the comparitions could work better ?!?

Delphi Code:

t1 := GetTickCount;

for i := 1 to 1 do
begin
p := src;
q := dst;
r := w*(h-1);
for x := 0 to w-1 do
begin
if p[x] = BLACK then
q[x] := BLACK;
if p[r+x] = BLACK then
q[r+x] := BLACK;
end;
for y := 0 to h-1 do
begin
r := w*y;
if p[r] = BLACK then
q[r] := BLACK;
r := r+w-1;
if p[r] = BLACK then
q[r] := BLACK;
end;
for y := 1 to h-2 do
begin
p := bmp.ScanLine[y];
q := result.ScanLine[y];
for x := 1 to w-2 do
begin
if (p[x] = BLACK) and not ((p[x-1] = BLACK) and (p[x+1] = BLACK)
and (p[x-w] = BLACK) and (p[x+w] = BLACK)) then
q[x] := BLACK;
end;
end;
end;
t2 := GetTickCount;
ShowMessage(IntToStr(t2-t1));


Asm Code:

t1 := GetTickCount;

for i := 1 to 1 do
asm
{ Save the registers ? Delphi pre-requisit }
push edi
push esi
push ebx

mov esi, src
mov edi, dst

{ First line }
mov eax, w
@first_line:
cmp [esi], BLACK
jne @first_line_isnt_black
mov [edi], RED
@first_line_isnt_black:
add esi, 4
add edi, 4
dec eax
jnz @first_line

{ All image }
mov eax, h
sub eax, 2
mov ecx, bytesPerLine
mov edx, bytesPerLineNeg

@outterloop:
mov ebx, w
sub ebx, 2

{ First pixel of each line }
cmp [esi], BLACK
jne @1
mov [edi], RED
@1:
add esi, 4
add edi, 4

@innerloop:
{*
* if (p[x] = BLACK) and not ((p[x-1] = BLACK) and (p[x+1] = BLACK)
* and (p[x-w] = BLACK) and (p[x+w] = BLACK)) then
* q[x] := BLACK;
*}
cmp [esi], BLACK
jne @nao_e_limite
cmp [esi-4], WHITE
je @is_edge
cmp [esi+4], WHITE
je @is_edge
cmp [esi+ecx],WHITE
je @is_edge
cmp [esi+edx],WHITE
je @is_edge
jmp @isnt_edge

@is_edge:
mov [edi], RED

@isnt_edge:
add esi, 4
add edi, 4
dec ebx
jnz @innerloop

{ Last pixel of each line }
cmp [esi], BLACK
jne @2
mov [edi], RED
@2:
add esi, 4
add edi, 4

dec eax
jnz @outterloop

{ Last line }
mov eax, w
@last_line:
cmp [esi], BLACK
jne @ last_line_isnt_black
mov [edi], RED
@last_line_isnt_black:
add esi, 4
add edi, 4
dec eax
jnz @last_line

{ Restore the registers }
pop ebx
pop esi
pop edi
end;
t2 := GetTickCount;
ShowMessage(IntToStr(t2-t1));
Posted on 2005-01-02 13:18:52 by brunoavila
1. You shouldn't profile with GetTickCount, but with a high resolution counter.

2. You can optimize to have less cache-misses, as your image is quite big. (just a guess, don't no better, but nobody else appears to reply)
Posted on 2005-01-16 06:33:20 by lifewire