/*_____________________________________________________________________________ |/ / URL add-on by David "Saturn" van Moolenbroek | Version 0.1, released 12-01-2005 -- support on #help.script/Quakenet | Use/modify however you want, but please keep my name in it. Thank you! | | Keywords: URL parsing, regex, tokens | | This add-on provides a set of identifiers that can be useful to scripts | that deal with URLs and paths. See below for the list of identifiers. | | The script defines the following commands/identifiers: | | $urlparse(url) | | Properties: proto, user, pass, host, port, path | | Takes a URL in the form [proto://][user[:pass]@]host[:port][/path] and | returns a portion of it by means of the properties named after the parts | in the URL, or $true if the input really was a URL in that format and | $false otherwise. The proto, port and path properties return a default | of (respectively) http, 80 and /, if the corresponding fields aren't | present in the given URL. | | $urlport(protocol) | | Returns the default port for the given protocol. If the protocol is | unknown, zero is returned. This identifier does not use the standard | "services" file to map URL protocols to ports; the protocols for which | a port number is returned are: http, ftp, telnet, nntp, https, gopher. | | $urlencode(string) | | Encodes the input by converting spaces and all unprintable characters | into the form %XX where XX represents the hexadecimal value of those | characters. See RFC 1738 for details. | | $urldecode(string) | | Decodes the input by replacing all substrings in the form %XX where XX | is a hexadecimal value, with the character of that value. See RFC 1738 | for details. Note that all non-null characters are preserved (including | any newline characters for example), but double spaces are lost. | | $urlpath(path) | | Returns a normalized path, that is, a path that does not contain "//", | "." and ".." components. The resulting path always starts with a slash, | and ends with a slash if the last component of the path is certainly not | a file. If the path goes up beyond the root level, $null is returned. | | Simple example | | var %url = http://www.xise.nl/mirc/ | echo -a Host: $urlparse(%url).host $+ , path: $urlparse(%url).path \ _\_____________________________________________________________________________ */ alias urlparse { if ($regex($1-,/^(?:([a-z.+-]+)://|())(?:([^@:]+)(?::(.*?)|())@|()())([a-z0-9.-]+)(?::(\d+)|())(/.*)?$/i)) { if ($prop == proto) return $iif($regml(1) != $null,$ifmatch,http) elseif ($prop == user) return $regml(2) elseif ($prop == pass) return $regml(3) elseif ($prop == host) return $regml(4) elseif ($prop == port) return $iif($regml(5) != $null,$ifmatch,$urlport($iif($regml(1) != $null,$ifmatch,http))) elseif ($prop == path) return $iif($regml(6) != $null,$ifmatch,/) else return $true } else return $iif(!$prop,$false) } alias urlport { return $iif($findtok(http ftp telnet nntp https gopher,$1,32),$gettok(80 21 23 119 443 70,$ifmatch,32),0) } alias urlencode { var %t = $1-, %r = "", %c while ($len(%t)) { %c = $asc($left(%t,1)) %r = %r $+ $iif((%c <= 32) || (%c >= 127),% $+ $base($ifmatch,10,16,2),$chr(%c)) %t = $right(%t,-1) } return %r } alias urldecode { var %t = $replace($1-,+,$chr(32),% $+ 20,$chr(32)), %r = "" while ($regex(%t,/%([0-9A-F]{2})/i)) { .echo -q $regsub(%t,/(.*?)%([0-9A-F]{2})/i,,%t) %r = %r $+ $regml(1) $+ $chr($base($regml(2),16,10)) } return %r $+ %t } alias urlpath { var %t = $iif($right($1-,1) == /,/), %r = "", %i = 0 while (%i < $numtok($1-,47)) { inc %i if ($gettok($1-,%i,47) == .) %r = %r $+ $iif($right(%r,1) != /,/) elseif ($gettok($1-,%i,47) != ..) %r = $instok(%r,$ifmatch,0,47) else { if (!$numtok(%r,47)) return $null %r = $deltok(%r,-1,47) $+ / } } return $iif(%r != /,/) $+ %r $+ $iif((%r != $null) && ($right(%r,1) != /),%t) }