i want to extracted the serie of bit.. for example like the following function pototype

function extbit(x,start,step)
if x = 11011100, start=1, step=3 it mean that it will extract bit at index 1,4,7 so the result is 110.

or the other function prototype if it possible with the same performance

function extbit(x,mask)
if x = 11011100, mask=10010010. this will get the same result with above function which is 110.

i need x to be 32 bits number.

is there a fast way to implement this two function? (in asm or c) (i dont care about size, just want the highest speed)

thank you,
Doby
Posted on 2002-06-11 11:58:50 by doby


.data
shift_table db 0, 16, 22, 24, 26, 27, 28, 28
db 29, 29, 30, 30, 30, 30, 30, 30
db 31, 31, 31, 31, 31, 31, 31, 31
db 31, 31, 31, 31, 31, 31, 31


.code
getbits PROC bitString:DWORD, start:DWORD, step:DWORD
xor eax, eax
mov edx, bitString
mov ecx, start

@@:
bt edx, ecx
rcr eax, 1
add ecx, step
cmp ecx, 32
jl @B

mov edx, step ; May be faster to use a div & sub here
mov cl, [shift_table + edx] ; to avoid the bit cache miss.
shr eax, cl
ret
getbits ENDP


Untested off the top of my head stuff here...

Mirno
Posted on 2002-06-11 13:20:16 by Mirno
getbits PROC bitString:DWORD, start:DWORD, step:DWORD

mov edx, start
xor eax, eax
xor ecx, ecx
@@:
bt bitString, edx
rcr eax, 1
add edx, step
inc ecx
cmp edx, 32
jl @B
rol eax,cl
ret
getbits ENDP
Posted on 2002-06-11 13:46:30 by bitRAKE
hmmm seem there is no trick to make it just two or three operation or without loop on this function.

thank you very much for every answers,
Doby.
Posted on 2002-06-12 01:21:36 by doby


mov edx,[x]
mov ecx,[start]
xor ebx,ebx
xor eax,eax
inc ecx
shr edx,cl
mov ecx,[step]
@@: rcr eax,1
inc ebx
shr edx,cl
jne @B
rcl eax,1
mov ecx,ebx
rcl eax,cl
Posted on 2002-06-12 02:12:29 by Nexo
If you known how many bits gathering:


mov ebx,32
sub ebx,[bits]
lea ebx,[gather+4*ebx]
mov edx,[x]
mov ecx,[start]
inc ecx
shr edx,cl
xor eax,eax
jmp ebx
gather:
REPT 31
rcr eax,1
shr edx,cl
ENDM
rcl eax,1
mov ecx,[bits]
dec ecx
rcl eax,cl
ret

When use loop - clocks=5+2.4*bits, w/o loop clocks=8+bits
BT work very slow - clocks=3+10.25*bits, but if use BT reg,reg
clocks=3+3.25*bits
Posted on 2002-06-12 03:58:17 by Nexo
A solution with the mask. It's not fast I think (too many dependencies.)



ExtBit PROC bits:DWORD, _mask:DWORD

push ebx
push esi

mov ebx, bits
mov esi, _mask
xor eax, eax ;// result
xor ecx, ecx ;// position where to set the next bit
and ebx, esi
@@ :
xor edx, edx
shr ebx, 1 ;// cf = bit
adc edx, 0 ;// edx = bit
shl edx, cl ;// shift the bit
or eax, edx ;// set the bit in the result
shr esi, 1 ;// cf = mask bit
adc ecx, 0 ;// increment the position if the mask bit is 1
test ebx, ebx ;// exit when no more bits to set
jnz @B

pop esi
pop ebx

ret

ExtBit ENDP
Posted on 2002-06-12 15:23:44 by Dr. Manhattan
A shorter solution with the mask:


ExtBit PROC _bits:DWORD, _mask:DWORD

push ebx

mov ebx, _mask
mov edx, _bits
xor eax, eax
xor ecx, ecx
and edx, ebx
@@ :
test ebx, ebx
sets cl
shld eax, edx, cl
shl edx, 1
shl ebx, 1
jnz @B

pop ebx

ret

ExtBit ENDP
Posted on 2002-06-15 09:08:38 by Dr. Manhattan