I have written the following code to evaluate arithmetic expressions:



.CONST
OPP_BINARY EQU 0 ; binary and (&), or (|), xor (^), not (~)
OPP_SHIFT EQU 1 ; left shift (<<) and right shift (>>)
OPP_LINEAR EQU 2 ; binary plus (+), minus (-)
OPP_GEOMETRIC EQU 3 ; multiply (*), divide (/), modulus (%)
OPP_POLYNOMIAL EQU 4 ; power symbol (^), log, root
OPP_UNARY_MINUS EQU 5 ; -[number]
OPP_xCREMENT EQU 6 ; increment (++) and decrement (--)
OPP_PARENTHESIS EQU 7 ; ( and )

.CODE
InfixToPostfix proc uses esi edi ebx ecx edx pInfix:DWORD, pPostfix:DWORD
LOCAL stackPos:DWORD

mov stackPos, 0
invoke lstrlen, pInfix
add eax, pInfix
mov ecx, eax
mov esi, pInfix
mov edi, pPostfix
.WHILE esi < ecx
movzx ebx, BYTE PTR [esi]

; If the next character is part of a number
.IF ebx >= '0' && ebx <= '9'
mov BYTE PTR [edi], bl
add edi, 1
; If the next character is not a digit (or decimal), output a space
.IF BYTE PTR [esi+1] != '.' && ( BYTE PTR [esi+1] < '0' || BYTE PTR [esi+1] > '9' )
mov BYTE PTR [edi], ' '
add edi, 1
.ENDIF

; If the next character is a decimal point
.ELSEIF ebx == '.'
; If the character after the decimal is a digit
.IF BYTE PTR [esi+1] >= '0' || BYTE PTR [esi+1] <= '9'
mov BYTE PTR [edi], '.'
add edi, 1
.ELSE
invoke lstrcpy, pPostfix, CTEXT("Unexpected decimal point")
.WHILE stackPos > 0
pop edx
pop edx
sub stackPos, 1
.ENDW
xor eax, eax
ret
.ENDIF

; Ignore white space
.ELSEIF ebx == ' ' || ebx == 09h
; Do nothing

; Right parenthesis is a special case operator
.ELSEIF ebx == ')'
; Pop the stack until the left parenthesis is found
.WHILE DWORD PTR [esp+4] != '(' && stackPos > 0
pop edx
pop edx
mov BYTE PTR [edi], dl
add edi, 1
mov BYTE PTR [edi], ' '
add edi, 1
sub stackPos, 1
.ENDW

.IF stackPos > 0
; Discard the parenthesis, do not output
pop edx
pop edx
sub stackPos, 1
.ELSE
invoke lstrcpy, pPostfix, CTEXT("Unmatched right parenthesis")
; The stack is already clear
xor eax, eax
ret
.ENDIF

; The next character is assumed to be an operator
.ELSE
.switch( ebx ) ; Get the operator priority
.case '&', '|'
mov eax, OPP_BINARY
.break
.case '+', '-'
mov eax, OPP_LINEAR
.break
.case '*', '/', '%'
mov eax, OPP_GEOMETRIC
.break
.case '^'
mov eax, OPP_POLYNOMIAL
.break
.case 28h, 29h ; '(', ')'
mov eax, OPP_PARENTHESIS
.break
.default
; Unknown operator, print error, clear the stack, and return
invoke wsprintf, pPostfix, CTEXT("Error encountered around '%c' - Unknown operator"), ebx
.WHILE stackPos > 0
pop edx
pop edx
sub stackPos, 1
.ENDW
xor eax, eax
ret
.endswitch

; Pop all operators of higher priority than the current one to output
; with the exception of the left parenthesis, which is a special case
.WHILE DWORD PTR [esp] >= eax && DWORD PTR [esp+4] != '(' && stackPos > 0
pop edx
pop edx
mov BYTE PTR [edi], dl
add edi, 1
mov BYTE PTR [edi], ' '
add edi, 1
sub stackPos, 1
.ENDW

push ebx ; Actual opperator character
push eax ; Opperator priority
add stackPos, 1
.ENDIF

add esi, 1
.ENDW

; Pop the remaining operators from
; the stack and output them
.WHILE stackPos > 0
pop edx
pop edx
.IF edx != '('
mov BYTE PTR [edi], dl
add edi, 1
mov BYTE PTR [edi], ' '
add edi, 1
.ENDIF
sub stackPos, 1
.ENDW

; NULL terminate the postfix string
mov BYTE PTR [edi-1], 0

mov eax, edi
sub eax, pPostfix
sub eax, 1

ret
InfixToPostfix endp

EvalExp proc uses esi edi ebx ecx edx pPrefix:DWORD
LOCAL postfix[MAX_PATH]:BYTE
LOCAL expEnd:DWORD

invoke InfixToPostfix, pPrefix, ADDR postfix

lea esi, [postfix]
lea ebx, [esi+eax]
mov expEnd, ebx
.WHILE esi < expEnd
movzx ebx, BYTE PTR [esi]

; If next character is the beginning of an operand
.IF ebx >= '0' && ebx <= '9'
; Get the value of the operand
xor eax, eax
@@:
imul eax, 10
sub ebx, '0'
add eax, ebx
add esi, 1
movzx ebx, BYTE PTR [esi]
cmp ebx, ' '
jnz @B

; Operands are pushed directly to the stack
push eax

.ELSEIF ebx == ' '
; Do nothing, a space is just a divider

.ELSE
; An operator has been found
pop ecx ; Source operand
pop eax ; Destination operand

; Perform the operation
.switch( ebx )
.case '+'
add eax, ecx
.break
.case '-'
sub eax, ecx
.break
.case '*'
mul ecx
.break
.case '/'
xor edx, edx
div ecx
.break
.case '%'
xor edx, edx
div ecx
mov eax, edx
.break
.case '&'
and eax, ecx
.break
.case '|'
or eax, ecx
.break
.case '^' ; X^Y (thanks to bitRAKE)
mov ebx, ecx ; Y
mov ecx, eax ; X
mov eax, 1
xor edx, edx
jmp pow_a
pow_1: imul ecx
pow_2: imul ecx, ecx
pow_a: shr ebx, 1
jnbe pow_2
jc pow_1
.break
.endswitch

; Push the result of the operation back onto the stack
push eax
.ENDIF

; Advance to the next character
add esi, 1
.ENDW

; The top of the stack contains the result
pop eax
ret
EvalExp endp


and now I'm wondering if anybody has some advice for me on how to improve on it. I didn't have but one lousy referance so I'm pretty sure my code isn't the best. In particular, I'd appreciate any advice on how to add support for floating point numers durring the actual evaluation (in the EvalExp procedure). Any ideas, thoughts, criticisms, and advice would be a great help.

Spara
Posted on 2005-01-08 02:13:41 by Sparafusile
neat. i wrote something similar, recently, but i did it in c# :)

I was wondering: do you handle postfix expressions like (-a+b) and -(c/d) and such? I had a minor problem with the - in such cases.
Posted on 2005-01-08 08:55:06 by lifewire
Yeah, the unary operators (-,+,++,--,not) are not implemented yet. That's one of the things I'm having trouble figuring out. How did you do it?

Spara
Posted on 2005-01-08 13:19:55 by Sparafusile
well, my expression evaluator translated postfix expressions into infix (RPN) expressions which got on their turn translated into commands for a virtual stackmachine, so it differs a little from yours, but iirc, i checked whether the left hand of the '-' operator (i only had that operator as operator that differed from the others) was an operator itself, parenthesis or start of the expression. but note that i'm not 100% sure about that ;)
Posted on 2005-01-10 03:46:26 by lifewire
Yeah, the only thing I could think of was to check the lexeme to the left of the minus to see if it was another operator. This causes problems if I allow increment and decrement operations though. On a side not, I rewrote the functions above so that instead of keeping the expression in ascii form, the InfixToPostfix procedure converts it to a binary representation. This makes the whole process much easier and I think a little faster.

Edit: I just checked the Dragon Book and it said that, in the case of of a unary operator that is also a binary operator (minus sign in this case), the operator is "unary if the previous token was an operator, a left parenthesis, a comma, or an assignment symbol" (using the Fortran language for an example). Just thought you'd like to know.

Spara
Posted on 2005-01-10 13:07:13 by Sparafusile
i haven't really looked at the code, but i personally think recursion with parentheses are a lot easier than RPN, and keeping seperate stacks/define precedence and stuff. how mine worked was that i split he parsing thing into 3 layers, the first layer would check for addition/subtraction, the second layer would take care of multiplication/division, and the third layer would do stuff like parentheses and unary operators. the first layer would first call the second, and second third, etc. and take that result and do the operation. if a parenthese "(" is detected by layer 3, it calls layer 1, and sorta recurses like that. and when a unary operator is reached, layer 3 will just negate the next term (which may be a nested parenthese, which would then recurse back to layer 1 until a value was returned, etc.)

just my 2 cents. i'm still struggling to think of how to make a system like this versatile enough for a interpreter engine, or something like that. darn.
Posted on 2005-01-12 17:19:01 by Drocon
If I was to use your approach, Drocon, I'd have 11 differant functions for all the differant "layers" of operator precedence. That might be a little hard to maintain so I'll stick with the RPN way for now. I'm looking into using tripples or quadrupals, but it seems like more work than is needed right now. In case you're curious, you can download a working version of my expression evaluator here. Supported operators include:

+,-,*,/,%,^(power),&,|,xor,~,<<,>>,++,--,&&,||,!,<,>,<=,>=,==,!=,(bool),(,)

Only interger operands for now.

Spara
Posted on 2005-01-12 19:32:54 by Sparafusile
never heard of the dragon book, but at least i'm happy that my solution wasn't very brainless :)

about RPN/postfix: i think it makes sense to translate infix to postfix if the evaluation will get evaluated many times and speed matters. translating infix to postfix is almost identical to evaluating infix, i think.
Posted on 2005-01-13 05:34:15 by lifewire
Adding floating point support should be trivial, what part of it are you having problems with, and what kind of problems?

Leading unary operators can be handled near the check for the parenthesis. If found, treat it like a normal operator. When it's time to evaluate it, the item to be acted on should be on the top of the operand stack.
Following operators are processed by writing down all operators on the stack until you get to one with lower precedence, and then writing down the new operator instead of laying it on the stack.

You could compare the code with this snippet from my ARM7 assembler (written in NASM), but it doesn't have floating point or preposterous operators either, because they slipped my mind :P


; (c) me. For educational purposes.
GetExpr:
mov edi,[nextexpr]
add edi,byte 18
push byte -1
lodsb
cmp al,'#'
jz expr0
dec esi
expr0:
call fchspaces ; Skip blanks
test al,al
jz near synerror
cmp al,'('
jnz ikkeparentes
push byte -2
jmp short expr0
ikkeparentes:
call GetNumber
jae exp_nbr
mov al,129
stosb
call checklabel
xchg ecx,eax
stosw
jmp short nesteop
exp_nbr:
mov [edi],byte 128
inc edi
stosd
nesteop:
call fchspaces
lodsb
test al,al
jz near expslutt
push byte 11
pop ecx
xor edx,edx
oploop:
mov ah,[ecx+operators-1]
add ah,ah
adc edx,byte 0
shr ah,1
cmp al,ah
loopnz oploop
jnz operror
cmp cl,4
jb ikkeskiftop
cmp cl,6
jae ikkeskiftop
cmp al,[esi] ; Check << and >>
jnz operror
inc esi
ikkeskiftop:
pop eax
cmp eax,edx ; Check precedence
ja dyttnyop
pop eax ; Write down previous operator.
stosb
jmp short ikkeskiftop
dyttnyop:
jecxz sluttpar
push eax
push ecx ; Put on stack
push edx
jmp short expr0
sluttpar:
cmp al,255
jnz nesteop
push eax
mov al,')'
operror:
push eax
push dword operrmsg
call LogError
fjernneste2:
pop eax
fjernneste:
pop eax
cmp al,255
jz expferdig
cmp al,254
jnz fjernneste2
jmp short fjernneste
exp_ok:
stosb
push edi
mov edi,[nextexpr] ; Store field info
push edi
mov eax,[esp+12]
stosw
mov eax,[asmofs]
stosd
mov eax,[linenbr]
stosd
mov eax,[curpc]
movzx ecx,byte [thumbflag]
shl ecx,3
sub ecx,8
sub eax,ecx
stosd
pop edi
call calcexp2
pop eax
jae expferdig
mov [nextexpr],eax ; Put calculation on hold if unsuccessful
mov [edi+14],eax
expferdig:
dec esi
mov ecx,edi
mov edi,esi
ret 4
synerror:
push dword synerrmsg
call LogError
jmp short fjernneste
expslutt:
pop eax
cmp al,255
jz exp_ok
cmp al,254
jz synerror
pop eax
stosb
jmp short expslutt

calcexp:
mov edx,[edi+6]
mov [linenbr],edx
calcexp2:
push ebx
push esi
mov ebx,esp
lea esi,[edi+18]
calcloop:
xor eax,eax
lodsb
test al,al
jns calcop
cmp al,255 ; End of calculation
jz calcok
cmp al,128 ; Check label or number
jnz notcalcimm
lodsd
push eax
jmp short calcloop
notcalcimm:
xor eax,eax
lodsw
shl eax,4
add eax,labeltbl
test byte [eax+5],1
jz calcfail ; Forward reference
push dword [eax+8]
jmp short calcloop
calcop:
xchg edx,eax
pop ecx
pop eax
call [opfunc+edx*4-4]
push eax
jmp short calcloop
calcok:
pop eax
push eax
xor edx,edx
movzx ecx,byte [edi]
inc edx
cmp cl,4
jb simplefield
call [fieldfunc-16+ecx*4] ; Format field
jb fielderr
jmp short fieldok2
simplefield:
jecxz fieldok
mov cl,[fieldtbl-1+ecx]
shl edx,cl
cmp eax,edx
jb fieldok
fielderr:
push dword bounderrstr
call LogError
pop eax
fieldok:
mov cl,ch
shl eax,cl
fieldok2:
or ebp,eax
pop eax
clc
jmp short calcsuccess
calcfail:
stc
calcsuccess:
mov esp,ebx
pop esi
pop ebx
ret
bounderrstr db "Inappropriate number - 0x%X",0
opfunc dd opor,opxor,opand,oplsl,oplsr,opadd,opsub,opmul,opdiv,oprem
opor:
or eax,ecx
ret
opxor:
xor eax,ecx
ret
opand:
and eax,ecx
ret
oplsl:
shl eax,cl
ret
oplsr:
shr eax,cl
ret
opadd:
add eax,ecx
ret
opsub:
sub eax,ecx
ret
opmul:
imul ecx
ret
opdiv:
jecxz diverror
cdq
idiv ecx
ret
oprem:
jecxz diverror
cdq
idiv ecx
xchg edx,eax
ret
diverror:
push dword diverrstr
call LogError
xor eax,eax
ret
diverrstr db "Didn't you learn not to do that in elementary school?",0
; Formatting functions and number input not reproduced.
operrmsg db "Syntax error - %c",0
synerrmsg db "Operand error",0
shifterror db "Shift error",0
operators db "??^?<?+?*/%"
Posted on 2005-01-14 13:09:47 by Sephiroth3
never heard of the dragon book...


Dragon Book - it's the definitive book on compiler design and very worth the price if you're into that sort of thing.

Adding floating point support should be trivial, what part of it are you having problems with, and what kind of problems?


I'm having trouble converting from ascii to binary. Idealy I would write my own procedure, but after looking at some examples it look like that's more work than I want to put into it. I guess I'm going to have to use FpuAtoFL which is distributed with MASM, but that procedure (and it's counterpart FpuFLtoA) is confusing to me because I haven't found any good documentation for them.

Leading unary operators can be handled near the check for the parenthesis. If found, treat it like a normal operator. When it's time to evaluate it, the item to be acted on should be on the top of the operand stack.


Thanks, but I already solved the unary operator problem (as stated in my previous post). I belive you may be wrong in that statement anyway. Durring a infix to postfix conversion, the operands are not on the stack, but rather the operators are. The operands are output as soon as they're seen. Perhaps you ment that statement to refer to the evaluation process durring which the operands are pushed to the stack and the operators are handled immediately.

Spara
Posted on 2005-01-14 14:30:41 by Sparafusile
I just checked the Dragon Book and it said that, in the case of of a unary operator that is also a binary operator (minus sign in this case), the operator is "unary if the previous token was an operator, a left parenthesis, a comma, or an assignment symbol" (using the Fortran language for an example).

A simpler rule is that an operator is binary if the symbol to the left is an operand (number or variable or right bracketing symbol). Fewer comparisons. This rule and the one cited above don't work if there are postfix operators.
Posted on 2005-01-14 21:56:37 by tenkey
here's a very simple infix to postfix program in pascal, in a nutshell
the lowest function calls the next-higher precedence function and so on.



program postfix(input,output);

const n=255;
num_of_fns=18;

type str=string[n];

var expr,token,fns:str;
i,count:integer;
fns1:array[1..num_of_fns] of string[5];

procedure initfns;

begin
fns1[1]:='ASINH';fns1[2]:='ACOSH';fns1[3]:='ATANH';
fns1[4]:='SINH';fns1[5]:='COSH';fns1[6]:='TANH';
fns1[7]:='ASIN';fns1[8]:='ACOS';fns1[9]:='ATAN';
fns1[10]:='ALOG';fns1[11]:='SQRT';fns1[12]:='SIN';
fns1[13]:='COS';fns1[14]:='TAN';fns1[15]:='LOG';
fns1[16]:='EXP';fns1[17]:='SQR';fns1[18]:='LN';
end;

function upper(ch:char):char;
begin
if (ch>='a') and (ch<='z') then
upper:=chr(ord(ch)+(ord('A')-ord('a')))
else upper:=ch;
end; { upper }

procedure ucase(var x:str);
var i:integer;
begin
for i:=1 to length(x) do x[i]:=upper(x[i]);
end;

procedure modcheck(var expr:str);
var i:integer;
s:str;
begin
i:=1;
while (i<length(expr)) do begin
s:=copy(expr,i,5);
ucase(s);;
if (s=' MOD ') then begin
delete(expr,i,5);
insert('%',expr,i);
end;
i:=i+1;
end;
end;

procedure fncheck(var expr:str);
var i,p,l:integer;
f:str;

begin
ucase(expr);
for i:=1 to num_of_fns do begin
f:=fns1[i];
l:=length(f);
f:=concat(f,'(');
p:=pos(f,expr);
while p<>0 do begin
delete(expr,p,l);
insert(chr(i),expr,p);
p:=pos(f,expr);
end;
end;
end;

procedure removeblanks(var expr:str);

var ch:char;
i:integer;

begin
i:=1;
while i<=length(expr) do begin
ch:=expr[i];
if ch=' ' then delete(expr,i,1);
if ch<>' ' then i:=i+1;
end;
end;

procedure scan;

begin
if count<=length(expr) then begin
token:=expr[count];
count:=count+1;
end
else token:=' ';
end;

procedure error;
begin
expr:=' ';
token:=' ';
end;

function indentifier(var token:str):boolean;
var i,t:boolean;
begin
i:=(token[1] in ['A'..'Z','a'..'z']);
t:=i;
while t do begin
if (expr[count] in ['_','A'..'Z','a'..'z','0'..'9']) then begin
token:=token+expr[count];
count:=count+1;
end
else t:=not t;
end;
indentifier:=i;
end;

function constant(var token:str):boolean;
var c,t:boolean;
begin
t:=(token[1] in ['0'..'9']);
c:=t;
while c do begin
c:=(expr[count] in ['0'..'9']);
if c then begin
token:=token+expr[count];
count:=count+1;
end;
end;
constant:=t;
end;

procedure variable;
begin
if indentifier(token) then begin
write(token,' ');
scan;
end
else write('ERROR: missing VARIABLE ');
end;

procedure expression;
var t:char;

procedure unary;
var t:char;

procedure term;
var t:char;

procedure expon;

procedure gamma;
var t:char;

procedure factor;
var t:integer;

begin
if constant(token) then begin
write(token,' ');
scan;
end
else if (token[1] in ['+','-']) then unary
else if token='(' then begin
scan;
expression;
if token=')' then scan
else write('ERROR: missing ',chr(39),')',chr(39),' ');
end
else if (token[1] in [chr(1)..chr(num_of_fns)]) then begin
fns:=fns+token[1];
scan;
if token[1]='(' then begin
scan;
expression;
end
else begin
writeln;writeln(chr(39),'(',chr(39),' expected');
end;
if token[1]=')' then scan
else begin
writeln;writeln(chr(39),')',chr(39),' expected');
end;
t:=ord(fns[length(fns)]);
delete(fns,length(fns),1);
write(fns1[t],' ');
end
else variable;
end; { factor }

begin { gamma }
factor;
while token[1]='!' do begin
write('! ');
scan;
end;
end; { gamma }

begin { expon }
gamma;
while token[1]='^' do begin
scan;
if token[1]<>'^' then begin
gamma;
write('^ ');
end
else begin
writeln('syntax error');
error;
end;
end;
end; { expon }

begin { term }
expon;
while (token[1] in ['*','/','%']) do begin
t:=token[1];
scan;
expon;
case t of
'*':write('* ');
'/':write('/ ');
'%':write('mod ');
end; { case }
end;
end; { term }

begin { unary }
if token[1] in ['-','+'] then begin
t:=token[1];
scan;
term;
if t='-' then write('NEG ');
end
else term;
end; { unary }


begin { expression }
unary;
while (token[1]='-') or (token[1]='+') do begin
t:=token[1];
scan;
unary;
case t of
'+':write('+ ');
'-':write('- ');
end; { case }
end;
end; { exxpression }

procedure assignment;
begin
variable;
if token[1]='=' then scan else write('ERROR: no ASSIGNMENT ');
expression;
writeln('STORE');
end;

begin { postfix }
initfns;
expr:=' ';
while length(expr)>0 do begin
count:=1;for i:=1 to n do expr[i]:=' ';
fns:='';
write('Enter an expression: ');
readln(expr);
modcheck(expr);
fncheck(expr);
removeblanks(expr);
if length(expr)>0 then begin
scan;
if indentifier(token) and (expr[count]='=') then assignment
else begin
expression;
writeln;
end;
end;
writeln;
end;
end.
Posted on 2005-01-15 19:49:34 by jack
Yes, and the parsing strategy coded in Pascal is called recursive descent.

In ASM, the nested procedures are unnecessary. The nesting gets around Pascal's declare-before-use rule, without using the common FORWARD extension.
Posted on 2005-01-16 18:15:33 by tenkey
Sorry, but none of this is new to me. I've been writing recursive decent analyzers for years. What I do need some help on is converting the ASCII representation of a floating point number into the binary representation. Any hints would be appreciated.

Spara
Posted on 2005-01-17 18:19:46 by Sparafusile
You should first know how to translate 2.2 to binary if Im not wrong is 10.00110 and then a infinite repetition of 0110 then you should apply Cientific notation some like for example (if Im not wrong ;)) 1000110*10^(-101) there is a special way and is the called normalization denormalization is somewhat the notation cientific with some considerations. That is all that I can say about that :D, by the way, because you can not have infinite representations like the anterior one, the information is "packed" in acordance to a standar.

http://www.asmcommunity.net/board/viewtopic.php?t=16687&highlight=ieee+float

By the way, the assemblers should have some algorithms implemented at this side.

You can look at nasm, yasm or fasm...



By the way, I remember that the yasm transformations are from other source, where interesting and I where unable to undertand some things :).
Posted on 2005-01-17 22:42:04 by rea