Is there a windows function or something that will parse an address?

Take something like "http://www.whatever.com/user/test.zip"
and turn it into...
"http", "whatever.com" and "/user/test.zip"?

I could write it myself, but i was never good at writing parsers.
Posted on 2002-07-15 18:15:48 by ChimpFace9000
Sorry if it looks stupid, but I am really tired :(



; #################################################

.486
.model flat, stdcall
option casemap :none

; #################################################

include \masm32\include\windows.inc
include \masm32\include\user32.inc
include \masm32\include\kernel32.inc

includelib \masm32\lib\user32.lib
includelib \masm32\lib\kernel32.lib

; #################################################

.data
szTest db "http://www.whatever.com/user/test.zip", 0

; #################################################

.data?
szBuffer1 db 128 dup (?)
szBuffer2 db 128 dup (?)
szBuffer3 db 128 dup (?)


; #################################################

.code

start:

push esi
push edi

lea esi, szTest
lea edi, szBuffer1
@@:
mov al, byte ptr [esi]
inc esi
or al, al
jz _1
cmp al, ':'
je @F
mov byte ptr [edi], al
inc edi
jmp @B
@@:
lea edi, szBuffer2
add esi, 2 ; skip '//'
@@:
mov al, byte ptr [esi]
inc esi
or al, al
jz _1
cmp al, '/'
je @F
mov byte ptr [edi], al
inc edi
jmp @B
@@:
inc esi ; skip '/'
@@:
mov al, byte ptr [esi]
inc esi
or al, al
jz _1
cmp al, '/'
jne @B
lea edi, szBuffer3
@@:
mov al, byte ptr [esi]
inc esi
or al, al
jz @F
mov byte ptr [edi], al
inc edi
jmp @B
@@:

invoke MessageBox, 0, addr szBuffer1, 0, 0
invoke MessageBox, 0, addr szBuffer2, 0, 0
invoke MessageBox, 0, addr szBuffer3, 0, 0

_1:

pop edi
pop esi

call ExitProcess

end start

; #################################################
Posted on 2002-07-15 18:42:18 by bazik
Iczelion has an URL parser in his Download example.
Posted on 2002-07-15 22:53:03 by comrade
In the wininet library, there's a function called InternetCrackUrl, that does exactly what you want:
.386	

.model flat, stdcall

include user32.inc
include kernel32.inc
include wininet.inc

includelib user32.lib
includelib kernel32.lib
includelib wininet.lib

include windows.inc

.data
szUrl db "http://www.whatever.com/user/test.zip",0

.data?
szScheme db INTERNET_MAX_SCHEME_LENGTH dup (?)
szHostName db INTERNET_MAX_HOST_NAME_LENGTH dup (?)
szUserName db INTERNET_MAX_USER_NAME_LENGTH dup (?)
szPassword db INTERNET_MAX_PASSWORD_LENGTH dup (?)
szUrlPath db INTERNET_MAX_PATH_LENGTH dup (?)
szExtraInfo db 255 dup (?)
URLComponents URL_COMPONENTS<>

.code
start:

lea edx, URLComponents ;set up URL_COMPONENTS struct
assume edx: ptr URL_COMPONENTS
mov [edx].dwStructSize, sizeof URLComponents
mov [edx].lpszScheme, offset szScheme
mov [edx].dwSchemeLength, sizeof szScheme
mov [edx].nScheme, INTERNET_SCHEME_DEFAULT
mov [edx].lpszHostName, offset szHostName
mov [edx].dwHostNameLength, sizeof szHostName
mov [edx].nPort, 0 ;--
mov [edx].lpszUserName, offset szUserName
mov [edx].dwUserNameLength, sizeof szUserName
mov [edx].lpszPassword, offset szPassword
mov [edx].dwPasswordLength, sizeof szPassword
mov [edx].lpszUrlPath, offset szUrlPath
mov [edx].dwUrlPathLength, sizeof szUrlPath
mov [edx].lpszExtraInfo, offset szExtraInfo
mov [edx].dwExtraInfoLength, sizeof szExtraInfo
assume edx: nothing

invoke lstrlen, addr szUrl
invoke InternetCrackUrl, addr szUrl, eax, ICU_DECODE, addr URLComponents

invoke MessageBox, 0, addr szScheme, 0, 0 ;protocol scheme name - 'http'
invoke MessageBox, 0, addr szHostName, 0, 0 ;hostname - 'www.whatever.com'
invoke MessageBox, 0, addr szUrlPath, 0, 0 ;url path - '/user/test.zip'

invoke ExitProcess, 0

end start
for more info see:http://msdn.microsoft.com/library/default.asp?url=/workshop/networking/wininet/reference/functions/internetcrackurl.asp
Posted on 2002-07-16 06:21:22 by savage
Isn't it a bit overkill to use a API for that?

If you want REAL overkill, use Instr & Mid functions from Masm32.lib :tongue:
Posted on 2002-07-16 06:36:14 by bazik
ChimpFace9000 was asking if there was a windows function for it. And InternetCrackUrl does exactly that.

It is indeed kinda overkill
but hey, it does the job :grin:
Posted on 2002-07-16 06:44:10 by savage
Hehe :)

Well, I posted this source, because I am sure he wants to learn more about parsing. Basically, you just move bytes until you found a sort of "delimiter". Then you skip it and move on with moving bytes (in another buffer of course).

I know, that my above code has some space for improvments, because the basic loop is repeating. But I prefer it that way.
It's easy to understand, looks clear, doesn't use any "strange" instructions and is a lot faster than the same function in *any* HLL :)

Just my ?0.02 worth...
Posted on 2002-07-16 06:55:23 by bazik
hello, i tried translating it to C but seems i missed something InternetCrackUrl returns 0 and GetLastError says i have on parameter incorrect, here is the source

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <windows.h>
#include <wininet.h>

#define MAXBUF 255
URL_COMPONENTSA url;
char buffer[200]={0};

main()
{
url.dwStructSize = sizeof(URL_COMPONENTSA);
url.lpszScheme=INTERNET_MAX_SCHEME_LENGTH;
url.dwSchemeLength=sizeof(INTERNET_MAX_SCHEME_LENGTH);
url.nScheme=INTERNET_SCHEME_DEFAULT;
url.lpszHostName=INTERNET_MAX_HOST_NAME_LENGTH;
url.dwHostNameLength=sizeof(INTERNET_MAX_HOST_NAME_LENGTH);
url.nPort =0;
url.lpszUserName=INTERNET_MAX_USER_NAME_LENGTH;
url.dwUserNameLength=sizeof(INTERNET_MAX_USER_NAME_LENGTH);
url.lpszPassword=INTERNET_MAX_PASSWORD_LENGTH;
url.dwPasswordLength=sizeof(INTERNET_MAX_PASSWORD_LENGTH);
url.lpszUrlPath=INTERNET_MAX_PATH_LENGTH;
url.dwUrlPathLength=sizeof(INTERNET_MAX_PATH_LENGTH);
url.lpszExtraInfo=MAXBUF;
url.dwExtraInfoLength=sizeof(MAXBUF);

InternetCrackUrl("http://www.whatever.com/user/test.zip",strlen("http://www.whatever.com/user/test.zip"),ICU_DECODE,&url);
wsprintf(buffer,"%s-%s-%s",url.lpszScheme,url.lpszHostName,url.lpszUrlPath);
MessageBox(0,buffer,buffer,0);


return 0;
}

if someone sees anything wrong with the parameters please tell me, thanks in advance =)
Posted on 2002-08-03 10:39:09 by The Keeper
You see it seems to me that InternetCrackUrl is very strange API. For example when I parse http://www.hostname.com/index.html
it puts me:
protocol = http://
host=www.hostname.com
path=/index.html
Good. But when I change url to www.hostname.com/index.html
it returns me only empty strings.
In my programs I use this code:

**************************
; ###############################################################################
ParseUrl proc
LOCAL bBuf[260]:BYTE
;LOCAL protLen:DWORD, hostLen:DWORD, pathLen:DWORD, fileLen:DWORD

; Detect protocol presence
push esi
mov esi, offset szUrl
.while (eax)
.if byte ptr == ':'
.break
.endif
dec eax
inc esi
.endw
.if eax!=0 && byte ptr == '/' && byte ptr == '/'
add esi, 3
.else
invoke lstrcpy, addr bBuf, CStr("http://")
invoke lstrcat, addr bBuf, offset szUrl
invoke lstrcpy, offset szUrl, addr bBuf
mov esi, offset szUrl
add esi, 7
.endif
;invoke MessageBox, 0, esi, offset appName, 0

; Find file name
invoke lstrlen, addr szUrl
mov esi, offset szUrl
add esi, eax
.while (eax)
.if byte ptr == '/' || byte ptr == '\'
.break
.endif
dec eax
dec esi
.endw
inc esi
invoke lstrcpy, offset fileName1, esi
pop esi

ret
ParseUrl endp
**************************
Please note that it's a minimal version of this program but everybody can add another strings. E.g. if you must know name of protocol add
cmp dword ptr , "PTTH" ; for HTTP
cmp dword ptr , ":PTF" ; for FTP
and so on.

Mike
Posted on 2002-08-04 05:25:50 by Mike