Hi there,

i have an idea of making an indexing tool to index all words in a html document or something like that. But the point is i want to get this indexing algorithm working fast and good, i self think it's a good algo but it won't work yet because i'm not enough experienced in asm. The idea is to skip everything between the '<' and '>' tags and get the words that are separated by blank spaces (' '). Like you can see below I can't filter well these to be indexed words from the html to the allocated memory and write it to an index file. But it will work with some assistent of you folks!! Here it comes:


[code]
.386
.model flat,stdcall
option casemap:none
WinMain proto :DWORD,:DWORD,:DWORD,:DWORD
include \masm32\include\windows.inc
include \masm32\include\user32.inc
include \masm32\include\kernel32.inc
include \masm32\include\comdlg32.inc
includelib \masm32\lib\user32.lib
includelib \masm32\lib\kernel32.lib
includelib \masm32\lib\comdlg32.lib

.const
IDM_OPEN equ 1
IDM_EXIT equ 2
MAXSIZE equ 260

.data
ClassName db "Index",0
AppName db "Index",0
MenuName db "FirstMenu",0
ofn OPENFILENAME <>
FilterString db "All Files",0,"*.*",0
db "Text Files",0,"*.txt",0,0
buffer db "c:\index.htm",0
buffer2 db "c:\index2.htm",0
hMapFile HANDLE 0

.data?
hInstance HINSTANCE ?
CommandLine LPSTR ?
hFileRead HANDLE ?
hFileWrite HANDLE ?
hMenu HANDLE ?
pMemory DWORD ?
SizeWritten DWORD ?

mijnPointer DWORD ?
fileSize DWORD ?
hMem dd ?
pMem dd ?

.code
start:
invoke GetModuleHandle, NULL
mov hInstance,eax
invoke GetCommandLine
mov CommandLine,eax
invoke WinMain, hInstance,NULL,CommandLine, SW_SHOWDEFAULT
invoke ExitProcess,eax

WinMain proc hInst:HINSTANCE,
hPrevInst:HINSTANCE,
CmdLine:LPSTR,
CmdShow:DWORD
LOCAL wc:WNDCLASSEX
LOCAL msg:MSG
LOCAL hwnd:HWND
mov wc.cbSize,SIZEOF WNDCLASSEX
mov wc.style, CS_HREDRAW or CS_VREDRAW
mov wc.lpfnWndProc, OFFSET WndProc
mov wc.cbClsExtra,NULL
mov wc.cbWndExtra,NULL
push hInst
pop wc.hInstance
mov wc.hbrBackground,COLOR_WINDOW+1
mov wc.lpszMenuName,OFFSET MenuName
mov wc.lpszClassName,OFFSET ClassName
invoke LoadIcon,NULL,IDI_APPLICATION
mov wc.hIcon,eax
mov wc.hIconSm,eax
invoke LoadCursor,NULL,IDC_ARROW
mov wc.hCursor,eax
invoke RegisterClassEx, addr wc
invoke CreateWindowEx,WS_EX_CLIENTEDGE,ADDR ClassName,
ADDR AppName,WS_OVERLAPPEDWINDOW,
CW_USEDEFAULT, CW_USEDEFAULT,
300,200,NULL,NULL,hInst,NULL
mov hwnd,eax
invoke ShowWindow, hwnd,SW_SHOWNORMAL
invoke UpdateWindow, hwnd
.WHILE TRUE
invoke GetMessage, ADDR msg,NULL,0,0
.BREAK .IF (!eax)
invoke TranslateMessage, ADDR msg
invoke DispatchMessage, ADDR msg
.ENDW
mov eax,msg.wParam
ret
WinMain endp
WndProc proc hWnd:HWND, uMsg:UINT, wParam:WPARAM, lParam:LPARAM
.IF uMsg==WM_CREATE
invoke GetMenu,hWnd
mov hMenu,eax
.ELSEIF uMsg==WM_DESTROY
.if hMapFile!=0
invoke CloseHandle,hMapFile
mov hMapFile,0
invoke CloseHandle,hFileRead
.endif
invoke PostQuitMessage,NULL
.ELSEIF uMsg==WM_COMMAND
mov eax,wParam
.if lParam==0
.if ax==IDM_OPEN
invoke EnableMenuItem,hMenu,IDM_OPEN,MF_GRAYED
invoke CreateFile,ADDR buffer,GENERIC_READ,0,NULL,OPEN_EXISTING,
FILE_ATTRIBUTE_ARCHIVE,NULL
mov hFileRead,eax
invoke CreateFileMapping,hFileRead,NULL,PAGE_READONLY,0,0,NULL
mov hMapFile,eax
mov eax,OFFSET buffer
movzx edx,ofn.nFileOffset
add eax,edx
invoke SetWindowText,hWnd,eax
invoke CreateFile,ADDR buffer2,GENERIC_READ or GENERIC_WRITE,
FILE_SHARE_READ or FILE_SHARE_WRITE,NULL,
CREATE_NEW,FILE_ATTRIBUTE_ARCHIVE,NULL
mov hFileWrite,eax
invoke MapViewOfFile,hMapFile,FILE_MAP_READ,0,0,0
mov pMemory,eax
mov mijnPointer,eax
invoke GetFileSize,hFileRead,NULL
mov fileSize,eax
mov ecx,fileSize
mov al,"<"
mov edi,mijnPointer
repnz scasb
mov al,">"
repnz scasb
INVOKE GlobalAlloc,0,65535
mov hMem,eax
INVOKE GlobalLock,hMem
mov pMem,eax
mov al,"<"
mov esi,edi
mov edi,pMem
rep movsb
INVOKE GlobalUnlock,pMem
INVOKE GlobalFree,hMem
invoke WriteFile,hFileWrite,pMemory,
fileSize,ADDR SizeWritten,NULL
invoke UnmapViewOfFile,pMemory
invoke CloseHandle,hMapFile
mov hMapFile,0
invoke CloseHandle,hFileRead
invoke CloseHandle,hFileWrite
.else
invoke DestroyWindow, hWnd
.endif
.endif
.ELSE
invoke DefWindowProc,hWnd,uMsg,wParam,lParam
ret
.ENDIF
xor eax,eax
ret
WndProc endp
end start
[/code]

Added the formatting notation "[","code","]" and "" and changed the indenting so it reads more easily.
Posted on 2002-04-23 03:15:49 by eisodur
Have a look at the procedure in the MASM32 library called "StripRangeI" as it will remove HTML tags with no problems. Just set the tweo bytes as "<" and ">" and it will remove everything betwen them and the < >.

Regards,

hutch@movsd.com
Posted on 2002-04-23 07:24:58 by hutch--