Hi,

I'm taking a course this semester on assembly and part of the second assignment is to convert an array of characters to lowercase.
I was taught in class that you can do so by ORing the string with 0010 0000b, and this is what I am doing on the fragment of code below:
	or al, 0x20	; patern to convert to lowercase: 0010 0000 = 0x20

However, when I try to run the program, I get the following error:
Segmentation fault

I tried XOR and it doesn give me that error, but it also does not give me the desired result (uppercase become lower and lowercase become upper). I'm using NASM on Linux Intel.
Any thoughts?

Thanks in advance.
Posted on 2006-10-15 21:16:47 by Pensador
the error is elsewhere, not that OR. you can post entire source (if some of moderators isn't against).

by the way, clearer way to convert to upcase than or al, 20h is add al, 'A'-'a'
Posted on 2006-10-16 01:45:56 by vid

the error is elsewhere, not that OR. you can post entire source (if some of moderators isn't against).

by the way, clearer way to convert to upcase than or al, 20h is add al, 'A'-'a'


I didn't try using add because I'd need to log on to Linux. Here it goes:


; Description
; -----------
; This program converts two input strings to lowercase, removes all blanks and sets the
; variable PRE to 1 if the second string is a prefix of the first one, otherwise it sets
; PRE to 0.

segment .data
SOURCE db 'This is a Big Endeavour', 0x0A ; input string
SOURCE_len equ $ - SOURCE ; length of input string

TARGET db 'Is a big?', 0x0A ; input string
TARGET_len equ $ - TARGET ; length of input string

segment .bss
SM resb 24
TM resb 24

segment .text
global _start

_start:
mov esi, 0 ; initialize index to 0
mov edi, 0 ; initialize index to 0

start_SOURCE_convert:
mov al,

mov bl, 0x20
cmp al, bl ; If this is a space,
je end_SOURCE_convert ; do not copy it to 'SM'.

;or al, 0x20 ; patern to convert to lowercase: 0010 0000 = 0x20
mov , al
inc edi

end_SOURCE_convert:
inc esi

mov bx, 0x0A
cmp al, bl ; Is this the end of the string?
jne start_SOURCE_convert ; If not, go to 'start_SOURCE_remove_spaces'

; Prepare for next step
mov esi, 0 ; initialize index to 0
mov edi, 0 ; initialize index to 0


start_TARGET_convert:
mov al,

mov bx, 0x20
cmp al, bl ; If this is a space,
je end_TARGET_convert ; do not copy it to 'SM'.

;xor ax, 0x20 ; patern to convert to lowercase: 0010 0000 = 0x20
mov , al
inc edi

end_TARGET_convert:
inc esi

mov bx, 0x0A
cmp al, bl ; Is this the end of the string?
jne start_TARGET_convert ; If not, go to 'start_SOURCE_remove_spaces'


print:

; print SM
mov eax, 4
mov ebx, 1
mov ecx, SM
mov edx, 24
int 0x80

; print new line
mov eax, 4
mov ebx, 1
mov ecx, 0x0A
mov edx, 1
int 0x80

; print TM
mov eax, 4
mov ebx, 1
mov ecx, TM
mov edx, 24
int 0x80

; print new line
mov eax, 4
mov ebx, 1
mov ecx, 0x0A
mov edx, 1
int 0x80

exit:
mov eax, 1
int 0x80
Posted on 2006-10-16 07:35:41 by Pensador
Nope, using add didn give me the desired results:
4HISISA"IG%NDEAVOUR?)SABIsabig?
)3)3!)'.$%!6/5

Probably because it's trying to convert letters that were already lowercase...
Posted on 2006-10-16 08:07:23 by Pensador
segment .data
SOURCE db 'This is a Big Endeavour', 0x0A ; input string
SOURCE_len equ $ - SOURCE ; length of input string

TARGET db 'Is a big?', 0x0A ; input string
TARGET_len equ $ - TARGET ; length of input string
        nl      db 0xa

segment .bss
SM resb 24
TM resb 24

segment .text
global _start

_start:
mov esi, 0 ; initialize index to 0
mov edi, 0 ; initialize index to 0

start_SOURCE_convert:
mov al,

mov bl, 0x20
cmp al, bl ; If this is a space,
je end_SOURCE_convert ; do not copy it to 'SM'.

        mov    dl, al
or al, 0x20 ; patern to convert to lowercase: 0010 0000 = 0x20
mov , al
inc edi

end_SOURCE_convert:
inc esi

mov bx, 0x0A
cmp dl, bl ; Is this the end of the string?
jne start_SOURCE_convert ; If not, go to 'start_SOURCE_remove_spaces'

; Prepare for next step
mov esi, 0 ; initialize index to 0
mov edi, 0 ; initialize index to 0


start_TARGET_convert:
mov al,

mov bx, 0x20
cmp al, bl ; If this is a space,
je end_TARGET_convert ; do not copy it to 'SM'.

        mov    dl, al
or al, 0x20 ; patern to convert to lowercase: 0010 0000 = 0x20
mov , al
inc edi

end_TARGET_convert:
inc esi

mov bx, 0x0A
cmp dl, bl ; Is this the end of the string?
jne start_TARGET_convert ; If not, go to 'start_SOURCE_remove_spaces'


print:

; print SM
mov eax, 4
mov ebx, 1
mov ecx, SM
mov edx, 19
int 0x80

; print new line
mov eax, 4
mov ebx, 1
mov ecx, nl
mov edx, 1
int 0x80

; print TM
mov eax, 4
mov ebx, 1
mov ecx, TM
mov edx, 7
int 0x80

; print new line
mov eax, 4
mov ebx, 1
mov ecx, nl
mov edx, 1
int 0x80

exit:
mov eax, 1
int 0x80
Posted on 2006-10-16 08:24:58 by arafel
Hi arafel!

I noticed a typo on my code: in a couple places I used ax and bx when I should be using al and bl.

Your code works fine!

I see that you allocated space for the new line. If I understand well, you also stored the character on the side in the register dl so that the new line at the end of the strings doesn't get converted. Is that how you fixed it?

Thanks!
Posted on 2006-10-16 09:52:10 by Pensador
Is that how you fixed it?

Basically yes. But I guess there are some other ways, like comparing al to both 0xa and 0x2a and if it doesn't match neither of them jmp to start_SOURCE_convert..
btw, you could use immediate operands with cmp instruction directly ie.cmp dl, 0xa so you don't have to mov 0xa and 0x20 into bl each time.
Posted on 2006-10-16 11:21:56 by arafel