diff options
Diffstat (limited to 'win32/bin/www.pl')
-rw-r--r-- | win32/bin/www.pl | 901 |
1 files changed, 901 insertions, 0 deletions
diff --git a/win32/bin/www.pl b/win32/bin/www.pl new file mode 100644 index 0000000000..8022597454 --- /dev/null +++ b/win32/bin/www.pl @@ -0,0 +1,901 @@ +## +## Jeffrey Friedl (jfriedl@omron.co.jp) +## Copyri.... ah hell, just take it. +## +## This is "www.pl". +## Include (require) to use, execute ("perl www.pl") to print a man page. +## Requires my 'network.pl' library. +package www; +$version = "951219.9"; + +## +## 951219.9 +## -- oops, stopped sending garbage Authorization line when no +## authorization was requested. +## +## 951114.8 +## -- added support for HEAD, If-Modified-Since +## +## 951017.7 +## -- Change to allow a POST'ed HTTP text to have newlines in it. +## Added 'NewURL to the open_http_connection %info. Idea courtesy +## of Bryan Schmersal (http://www.transarc.com/~bryans/Home.html). +## +## +## 950921.6 +## -- added more robust HTTP error reporting +## (due to steven_campbell@uk.ibm.com) +## +## 950911.5 +## -- added Authorization support +## + +## +## HTTP return status codes. +## +%http_return_code = + (200,"OK", + 201,"Created", + 202,"Accepted", + 203,"Partial Information", + 204,"No Response", + 301,"Moved", + 302,"Found", + 303,"Method", + 304,"Not modified", + 400,"Bad request", + 401,"Unauthorized", + 402,"Payment required", + 403,"Forbidden", + 404,"Not found", + 500,"Internal error", + 501,"Not implemented", + 502,"Service temporarily overloaded", + 503,"Gateway timeout"); + +## +## If executed directly as a program, print as a man page. +## +if (length($0) >= 6 && substr($0, -6) eq 'www.pl') +{ + seek(DATA, 0, 0) || die "$0: can't reset internal pointer.\n"; + print "www.pl version $version\n", '=' x 60, "\n"; + while (<DATA>) { + next unless /^##>/../^##</; ## select lines to print + s/^##[<> ]?//; ## clean up + print; + } + exit(0); +} + +## +## History: +## version 950425.4 +## added require for "network.pl" +## +## version 950425.3 +## re-did from "Www.pl" which was a POS. +## +## +## BLURB: +## A group of routines for dealing with URLs, HTTP sessions, proxies, etc. +## Requires my 'network.pl' package. The library file can be executed +## directly to produce a man page. + +##> +## A motley group of routines for dealing with URLs, HTTP sessions, proxies, +## etc. Requires my 'network.pl' package. +## +## Latest version, as well as other stuff (including network.pl) available +## at http://www.wg.omron.co.jp/~jfriedl/perl/ +## +## Simpleton complete program to dump a URL given on the command-line: +## +## require 'network.pl'; ## required for www.pl +## require 'www.pl'; ## main routines +## $URL = shift; ## get URL +## ($status, $memo) = &www'open_http_url(*IN, $URL); ## connect +## die "$memo\n" if $status ne 'ok'; ## report any error +## print while <IN>; ## dump contents +## +## There are various options available for open_http_url. +## For example, adding 'quiet' to the call, i.e. vvvvvvv-----added +## ($status, $memo) = &www'open_http_url(*IN, $URL, 'quiet'); +## suppresses the normal informational messages such as "waiting for data...". +## +## The options, as well as the various other public routines in the package, +## are discussed below. +## +##< + +## +## Default port for the protocols whose URL we'll at least try to recognize. +## +%default_port = ('http', 80, + 'ftp', 21, + 'gopher', 70, + 'telnet', 23, + 'wais', 210, + ); + +## +## A "URL" to "ftp.blah.com" without a protocol specified is probably +## best reached via ftp. If the hostname begins with a protocol name, it's +## easy. But something like "www." maps to "http", so that mapping is below: +## +%name2protocol = ( + 'www', 'http', + 'wwwcgi','http', +); + +$last_message_length = 0; +$useragent = "www.pl/$version"; + +## +##> +############################################################################## +## routine: open_http_url +## +## Used as +## ($status, $memo, %info) = &www'open_http_url(*FILEHANDLE, $URL, options..) +## +## Given an unused filehandle, a URL, and a list of options, opens a socket +## to the URL and returns with the filehandle ready to read the data of the +## URL. The HTTP header, as well as other information, is returned in %info. +## +## OPTIONS are from among: +## +## "post" +## If PATH appears to be a query (i.e. has a ? in it), contact +## via a POST rather than a GET. +## +## "nofollow" +## Normally, if the initial contact indicates that the URL has moved +## to a different location, the new location is automatically contacted. +## "nofollow" inhibits this. +## +## "noproxy" +## Normally, a proxy will be used if 'http_proxy' is defined in the +## environment. This option inhibits the use of a proxy. +## +## "retry" +## If a host's address can't be found, it may well be because the +## nslookup just didn't return in time and that retrying the lookup +## after a few seconds will succeed. If this option is given, will +## wait five seconds and try again. May be given multiple times to +## retry multiple times. +## +## "quiet" +## Informational messages will be suppressed. +## +## "debug" +## Additional messages will be printed. +## +## "head" +## Requests only the file header to be sent +## +## +## +## +## The return array is ($STATUS, $MEMO, %INFO). +## +## STATUS is 'ok', 'error', 'status', or 'follow' +## +## If 'error', the MEMO will indicate why (URL was not http, can't +## connect, etc.). INFO is probably empty, but may have some data. +## See below. +## +## If 'status', the connnection was made but the reply was not a normal +## "OK" successful reply (i.e. "Not found", etc.). MEMO is a note. +## INFO is filled as noted below. Filehandle is ready to read (unless +## $info{'BODY'} is filled -- see below), but probably most useful +## to treat this as an 'error' response. +## +## If 'follow', MEMO is the new URL (for when 'nofollow' was used to +## turn off automatic following) and INFO is filled as described +## below. Unless you wish to give special treatment to these types of +## responses, you can just treat 'follow' responses like 'ok' +## responses. +## +## If 'ok', the connection went well and the filehandle is ready to +## read. +## +## INFO contains data as described at the read_http_header() function (in +## short, the HTTP response header) and additional informational fields. +## In addition, the following fields are filled in which describe the raw +## connection made or attempted: +## +## PROTOCOL, HOST, PORT, PATH +## +## Note that if a proxy is being used, these will describe the proxy. +## The field TARGET will describe the host or host:port ultimately being +## contacted. When no proxy is being used, this will be the same info as +## in the raw connection fields above. However, if a proxy is being used, +## it will refer to the final target. +## +## In some cases, the additional entry $info{'BODY'} exists as well. If +## the result-code indicates an error, the body of the message may be +## parsed for internal reasons (i.e. to support 'repeat'), and if so, it +## will be saved in $info{'BODY}. +## +## If the URL has moved, $info{'NewURL'} will exist and contain the new +## URL. This will be true even if the 'nofollow' option is specified. +## +##< +## +sub open_http_url +{ + local(*HTTP, $URL, @options) = @_; + return &open_http_connection(*HTTP, $URL, undef, undef, undef, @options); +} + + +## +##> +############################################################################## +## routine: read_http_header +## +## Given a filehandle to a just-opened HTTP socket connection (such as one +## created via &network'connect_to which has had the HTTP request sent), +## reads the HTTP header and and returns the parsed info. +## +## ($replycode, %info) = &read_http_header(*FILEHANDLE); +## +## $replycode will be the HTTP reply code as described below, or +## zero on header-read error. +## +## %info contains two types of fields: +## +## Upper-case fields are informational from the function. +## Lower-case fields are the header field/value pairs. +## +## Upper-case fields: +## +## $info{'STATUS'} will be the first line read (HTTP status line) +## +## $info{'CODE'} will be the numeric HTTP reply code from that line. +## This is also returned as $replycode. +## +## $info{'TYPE'} is the text from the status line that follows CODE. +## +## $info{'HEADER'} will be the raw text of the header (sans status line), +## newlines and all. +## +## $info{'UNKNOWN'}, if defined, will be any header lines not in the +## field/value format used to fill the lower-case fields of %info. +## +## Lower-case fields are reply-dependent, but in general are described +## in http://info.cern.ch/hypertext/WWW/Protocols/HTTP/Object_Headers.html +## +## A header line such as +## Content-type: Text/Plain +## will appear as $info{'content-type'} = 'Text/Plain'; +## +## (*) Note that while the field names are are lower-cased, the field +## values are left as-is. +## +## +## When $replycode is zero, there are two possibilities: +## $info{'TYPE'} is 'empty' +## No response was received from the filehandle before it was closed. +## No other %info fields present. +## $info{'TYPE'} is 'unknown' +## First line of the response doesn't seem to be proper HTTP. +## $info{'STATUS'} holds that line. No other %info fields present. +## +## The $replycode, when not zero, is as described at +## http://info.cern.ch/hypertext/WWW/Protocols/HTTP/HTRESP.html +## +## Some of the codes: +## +## success 2xx +## ok 200 +## created 201 +## accepted 202 +## partial information 203 +## no response 204 +## redirection 3xx +## moved 301 +## found 302 +## method 303 +## not modified 304 +## error 4xx, 5xx +## bad request 400 +## unauthorized 401 +## paymentrequired 402 +## forbidden 403 +## not found 404 +## internal error 500 +## not implemented 501 +## service temporarily overloaded 502 +## gateway timeout 503 +## +##< +## +sub read_http_header +{ + local(*HTTP) = @_; + local(%info, $_); + + ## + ## The first line of the response will be the status (OK, error, etc.) + ## + unless (defined($info{'STATUS'} = <HTTP>)) { + $info{'TYPE'} = "empty"; + return (0, %info); + } + chop $info{'STATUS'}; + + ## + ## Check the status line. If it doesn't match and we don't know the + ## format, we'll just let it pass and hope for the best. + ## + unless ($info{'STATUS'} =~ m/^HTTP\S+\s+(\d\d\d)\s+(.*\S)/i) { + $info{'TYPE'} = 'unknown'; + return (0, %info); + } + + $info{'CODE'} = $1; + $info{'TYPE'} = $2; + $info{'HEADER'} = ''; + + ## read the rest of the header. + while (<HTTP>) { + last if m/^\s*$/; + $info{'HEADER'} .= $_; ## save whole text of header. + + if (m/^([^\n:]+):[ \t]*(.*\S)/) { + local($field, $value) = ("\L$1", $2); + if (defined $info{$field}) { + $info{$field} .= "\n" . $value; + } else { + $info{$field} = $value; + } + } elsif (defined $info{'UNKNOWN'}) { + $info{'UNKNOWN'} .= $_; + } else { + $info{'UNKNOWN'} = $_; + } + } + + return ($info{'CODE'}, %info); +} + +## +##> +## +############################################################################## +## routine: grok_URL(URL, noproxy, defaultprotocol) +## +## Given a URL, returns access information. Deals with +## http, wais, gopher, ftp, and telnet +## URLs. +## +## Information returned is +## (PROTOCOL, HOST, PORT, PATH, TARGET, USER, PASSWORD) +## +## If noproxy is not given (or false) and there is a proxy defined +## for the given protocol (via the "*_proxy" environmental variable), +## the returned access information will be for the proxy and will +## reference the given URL. In this case, 'TARGET' will be the +## HOST:PORT of the original URL (PORT elided if it's the default port). +## +## Access information returned: +## PROTOCOL: "http", "ftp", etc. (guaranteed to be lowercase). +## HOST: hostname or address as given. +## PORT: port to access +## PATH: path of resource on HOST:PORT. +## TARGET: (see above) +## USER and PASSWORD: for 'ftp' and 'telnet' URLs, if supplied by the +## URL these will be defined, undefined otherwise. +## +## If no protocol is defined via the URL, the defaultprotocol will be used +## if given. Otherwise, the URL's address will be checked for a leading +## protocol name (as with a leading "www.") and if found will be used. +## Otherwise, the protocol defaults to http. +## +## Fills in the appropriate default port for the protocol if need be. +## +## A proxy is defined by a per-protocol environmental variable such +## as http_proxy. For example, you might have +## setenv http_proxy http://firewall:8080/ +## setenv ftp_proxy $http_proxy +## to set it up. +## +## A URL seems to be officially described at +## http://www.w3.org/hypertext/WWW/Addressing/URL/5_BNF.html +## although that document is a joke of errors. +## +##< +## +sub grok_URL +{ + local($_, $noproxy, $defaultprotocol) = @_; + $noproxy = defined($noproxy) && $noproxy; + + ## Items to be filled in and returned. + local($protocol, $address, $port, $path, $target, $user, $password); + + return undef unless m%^(([a-zA-Z]+)://|/*)([^/]+)(/.*)?$%; + + ## + ## Due to a bug in some versions of perl5, $2 might not be empty + ## even if $1 is. Therefore, we must check $1 for a : to see if the + ## protocol stuff matched or not. If not, the protocol is undefined. + ## + ($protocol, $address, $path) = ((index($1,":") >= 0 ? $2 : undef), $3, $4); + + if (!defined $protocol) + { + ## + ## Choose a default protocol if none given. If address begins with + ## a protocol name (one that we know via %name2protocol or + ## %default_port), choose it. Otherwise, choose http. + ## + if (defined $defaultprotocol) { + $protocol = $defaultprotocol; + } + else + { + $address =~ m/^[a-zA-Z]+/; + if (defined($name2protocol{"\L$&"})) { + $protocol = $name2protocol{"\L$&"}; + } else { + $protocol = defined($default_port{"\L$&"}) ? $& : 'http'; + } + } + } + $protocol =~ tr/A-Z/a-z/; ## ensure lower-case. + + ## + ## Http support here probably not kosher, but fits in nice for basic + ## authorization. + ## + if ($protocol eq 'ftp' || $protocol eq 'telnet' || $protocol eq 'http') + { + ## Glean a username and password from address, if there. + ## There if address starts with USER[:PASSWORD]@ + if ($address =~ s/^(([^\@:]+)(:([^@]+))?\@)//) { + ($user, $password) = ($2, $4); + } + } + + ## + ## address left is (HOSTNAME|HOSTNUM)[:PORTNUM] + ## + if ($address =~ s/:(\d+)$//) { + $port = $1; + } else { + $port = $default_port{$protocol}; + } + + ## default path is '/'; + $path = '/' if !defined $path; + + ## + ## If there's a proxy and we're to proxy this request, do so. + ## + local($proxy) = $ENV{$protocol."_proxy"}; + if (!$noproxy && defined($proxy) && !&no_proxy($protocol,$address)) + { + local($dummy); + local($old_pass, $old_user); + + ## + ## Since we're going through a proxy, we want to send the + ## proxy the entire URL that we want. However, when we're + ## doing Authenticated HTTP, we need to take out the user:password + ## that webget has encoded in the URL (this is a bit sleazy on + ## the part of webget, but the alternative is to have flags, and + ## having them part of the URL like with FTP, etc., seems a bit + ## cleaner to me in the context of how webget is used). + ## + ## So, if we're doing this slezy thing, we need to construct + ## the new URL from the compnents we have now (leaving out password + ## and user), decode the proxy URL, then return the info for + ## that host, a "filename" of the entire URL we really want, and + ## the user/password from the original URL. + ## + ## For all other things, we can just take the original URL, + ## ensure it has a protocol on it, and pass it as the "filename" + ## we want to the proxy host. The difference between reconstructing + ## the URL (as for HTTP Authentication) and just ensuring the + ## protocol is there is, except for the user/password stuff, + ## nothing. In theory, at least. + ## + if ($protocol eq 'http' && (defined($password) || defined($user))) + { + $path = "http://$address$path"; + $old_pass = $password; + $old_user = $user; + } else { + ## Re-get original URL and ensure protocol// actually there. + ## This will become our new path. + ($path = $_) =~ s,^($protocol:)?/*,$protocol://,i; + } + + ## note what the target will be + $target = ($port==$default_port{$protocol})?$address:"$address:$port"; + + ## get proxy info, discarding + ($protocol, $address, $port, $dummy, $dummy, $user, $password) + = &grok_URL($proxy, 1); + $password = $old_pass if defined $old_pass; + $user = $old_user if defined $old_user; + } + ($protocol, $address, $port, $path, $target, $user, $password); +} + + + +## +## &no_proxy($protocol, $host) +## +## Returns true if the specified host is identified in the no_proxy +## environmental variable, or identify the proxy server itself. +## +sub no_proxy +{ + local($protocol, $targethost) = @_; + local(@dests, $dest, $host, @hosts, $aliases); + local($proxy) = $ENV{$protocol."_proxy"}; + return 0 if !defined $proxy; + $targethost =~ tr/A-Z/a-z/; ## ensure all lowercase; + + @dests = ($proxy); + push(@dests,split(/\s*,\s*/,$ENV{'no_proxy'})) if defined $ENV{'no_proxy'}; + + foreach $dest (@dests) + { + ## just get the hostname + $host = (&grok_URL($dest, 1), 'http')[1]; + + if (!defined $host) { + warn "can't grok [$dest] from no_proxy env.var.\n"; + next; + } + @hosts = ($host); ## throw in original name just to make sure + ($host, $aliases) = (gethostbyname($host))[0, 1]; + + if (defined $aliases) { + push(@hosts, ($host, split(/\s+/, $aliases))); + } else { + push(@hosts, $host); + } + foreach $host (@hosts) { + next if !defined $host; + return 1 if "\L$host" eq $targethost; + } + } + return 0; +} + +sub ensure_proper_network_library +{ + require 'network.pl' if !defined $network'version; + warn "WARNING:\n". __FILE__ . + qq/ needs a newer version of "network.pl"\n/ if + !defined($network'version) || $network'version < "950311.5"; +} + + + +## +##> +############################################################################## +## open_http_connection(*FILEHANDLE, HOST, PORT, PATH, TARGET, OPTIONS...) +## +## Opens an HTTP connection to HOST:PORT and requests PATH. +## TARGET is used only for informational messages to the user. +## +## If PORT and PATH are undefined, HOST is taken as an http URL and TARGET +## is filled in as needed. +## +## Otherwise, it's the same as open_http_url (including return value, etc.). +##< +## +sub open_http_connection +{ + local(*HTTP, $host, $port, $path, $target, @options) = @_; + local($post_text, @error, %seen); + local(%info); + + &ensure_proper_network_library; + + ## options allowed: + local($post, $retry, $authorization, $nofollow, $noproxy, + $head, $debug, $ifmodifiedsince, $quiet, ) = (0) x 10; + ## parse options: + foreach $opt (@options) + { + next unless defined($opt) && $opt ne ''; + local($var, $val); + if ($opt =~ m/^(\w+)=(.*)/) { + ($var, $val) = ($1, $2); + } else { + $var = $opt; + $val = 1; + } + $var =~ tr/A-Z/a-z/; ## ensure variable is lowercase. + local(@error); + + eval "if (defined \$$var) { \$$var = \$val; } else { \@error = + ('error', 'bad open_http_connection option [$opt]'); }"; + return ('error', "open_http_connection eval: $@") if $@; + return @error if defined @error; + } + $quiet = 0 if $debug; ## debug overrides quiet + + local($protocol, $error, $code, $URL, %info, $tmp, $aite); + + ## + ## if both PORT and PATH are undefined, treat HOST as a URL. + ## + unless (defined($port) && defined($path)) + { + ($protocol,$host,$port,$path,$target)=&grok_URL($host,$noproxy,'http'); + if ($protocol ne "http") { + return ('error',"open_http_connection doesn't grok [$protocol]"); + } + unless (defined($host)) { + return ('error', "can't grok [$URL]"); + } + } + + return ('error', "no port in URL [$URL]") unless defined $port; + return ('error', "no path in URL [$URL]") unless defined $path; + + RETRY: while(1) + { + ## we'll want $URL around for error messages and such. + if ($port == $default_port{'http'}) { + $URL = "http://$host"; + } else { + $URL = "http://$host:$default_port{'http'}"; + } + $URL .= ord($path) eq ord('/') ? $path : "/$path"; + + $aite = defined($target) ? "$target via $host" : $host; + + &message($debug, "connecting to $aite ...") unless $quiet; + + ## + ## note some info that might be of use to the caller. + ## + local(%preinfo) = ( + 'PROTOCOL', 'http', + 'HOST', $host, + 'PORT', $port, + 'PATH', $path, + ); + if (defined $target) { + $preinfo{'TARGET'} = $target; + } elsif ($default_port{'http'} == $port) { + $preinfo{'TARGET'} = $host; + } else { + $preinfo{'TARGET'} = "$host:$port"; + } + + ## connect to the site + $error = &network'connect_to(*HTTP, $host, $port); + if (defined $error) { + return('error', "can't connect to $aite: $error", %preinfo); + } + + ## If we're asked to POST and it looks like a POST, note post text. + if ($post && $path =~ m/\?/) { + $post_text = $'; ## everything after the '?' + $path = $`; ## everything before the '?' + } + + ## send the POST or GET request + $tmp = $head ? 'HEAD' : (defined $post_text ? 'POST' : 'GET'); + + &message($debug, "sending request to $aite ...") if !$quiet; + print HTTP $tmp, " $path HTTP/1.0\n"; + + ## send the If-Modified-Since field if needed. + if ($ifmodifiedsince) { + print HTTP "If-Modified-Since: $ifmodifiedsince\n"; + } + + ## oh, let's sputter a few platitudes..... + print HTTP "Accept: */*\n"; + print HTTP "User-Agent: $useragent\n" if defined $useragent; + + ## If doing Authorization, do so now. + if ($authorization) { + print HTTP "Authorization: Basic ", + &htuu_encode($authorization), "\n"; + } + + ## If it's a post, send it. + if (defined $post_text) + { + print HTTP "Content-type: application/x-www-form-urlencoded\n"; + print HTTP "Content-length: ", length $post_text, "\n\n"; + print HTTP $post_text, "\n"; + } + print HTTP "\n"; + &message($debug, "waiting for data from $aite ...") unless $quiet; + + ## we can now read the response (header, then body) via HTTP. + binmode(HTTP); ## just in case. + + ($code, %info) = &read_http_header(*HTTP); + &message(1, "header returns code $code ($info{'TYPE'})") if $debug; + + ## fill in info from %preinfo + local($val, $key); + while (($val, $key) = each %preinfo) { + $info{$val} = $key; + } + + if ($code == 0) + { + return('error',"empty response for $URL") + if $info{'TYPE'} eq 'empty'; + return('error', "non-HTTP response for $URL", %info) + if $info{'TYPE'} eq 'unknown'; + return('error', "unknown zero-code for $URL", %info); + } + + if ($code == 302) ## 302 is magic for "Found" + { + if (!defined $info{'location'}) { + return('error', "No location info for Found URL $URL", %info); + } + local($newURL) = $info{'location'}; + + ## Remove :80 from hostname, if there. Looks ugly. + $newURL =~ s,^(http:/+[^/:]+):80/,$1/,i; + $info{"NewURL"} = $newURL; + + ## if we're not following links or if it's not to HTTP, return. + return('follow', $newURL, %info) if + $nofollow || $newURL!~m/^http:/i; + + ## note that we've seen this current URL. + $seen{$host, $port, $path} = 1; + + &message(1, qq/[note: now moved to "$newURL"]/) unless $quiet; + + + ## get the new one and return an error if it's been seen. + ($protocol, $host, $port, $path, $target) = + &www'grok_URL($newURL, $noproxy); + &message(1, "[$protocol][$host][$port][$path]") if $debug; + + if (defined $seen{$host, $port, $path}) + { + return('error', "circular reference among:\n ". + join("\n ", sort grep(/^http/i, keys %seen)), %seen); + } + next RETRY; + } + elsif ($code == 500) ## 500 is magic for "internal error" + { + ## + ## A proxy will often return this with text saying "can't find + ## host" when in reality it's just because the nslookup returned + ## null at the time. Such a thing should be retied again after a + ## few seconds. + ## + if ($retry) + { + local($_) = $info{'BODY'} = join('', <HTTP>); + if (/Can't locate remote host:\s*(\S+)/i) { + local($times) = ($retry == 1) ? + "once more" : "up to $retry more times"; + &message(0, "can't locate $1, will try $times ...") + unless $quiet; + sleep(5); + $retry--; + next RETRY; + } + } + } + + if ($code != 200) ## 200 is magic for "OK"; + { + ## I'll deal with these as I see them..... + &clear_message; + if ($info{'TYPE'} eq '') + { + if (defined $http_return_code{$code}) { + $info{'TYPE'} = $http_return_code{$code}; + } else { + $info{'TYPE'} = "(unknown status code $code)"; + } + } + return ('status', $info{'TYPE'}, %info); + } + + &clear_message; + return ('ok', 'ok', %info); + } +} + + +## +## Hyper Text UUencode. Somewhat different from regular uuencode. +## +## Logic taken from Mosaic for X code by Mark Riordan and Ari Luotonen. +## +sub htuu_encode +{ + local(@in) = unpack("C*", $_[0]); + local(@out); + + push(@in, 0, 0); ## in case we need to round off an odd byte or two + while (@in >= 3) { + ## + ## From the next three input bytes, + ## construct four encoded output bytes. + ## + push(@out, $in[0] >> 2); + push(@out, (($in[0] << 4) & 060) | (($in[1] >> 4) & 017)); + push(@out, (($in[1] << 2) & 074) | (($in[2] >> 6) & 003)); + push(@out, $in[2] & 077); + splice(@in, 0, 3); ## remove these three + } + + ## + ## @out elements are now indices to the string below. Convert to + ## the appropriate actual text. + ## + foreach $new (@out) { + $new = substr( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", + $new, 1); + } + + if (@in == 2) { + ## the two left over are the two extra nulls, so we encoded the proper + ## amount as-is. + } elsif (@in == 1) { + ## We encoded one extra null too many. Undo it. + $out[$#out] = '='; + } else { + ## We must have encoded two nulls... Undo both. + $out[$#out ] = '='; + $out[$#out -1] = '='; + } + + join('', @out); +} + +## +## This message stuff really shouldn't be here, but in some seperate library. +## Sorry. +## +## Called as &message(SAVE, TEXT ....), it shoves the text to the screen. +## If SAVE is true, bumps the text out as a printed line. Otherwise, +## will shove out without a newline so that the next message overwrites it, +## or it is clearded via &clear_message(). +## +sub message +{ + local($nl) = shift; + die "oops $nl." unless $nl =~ m/^\d+$/; + local($text) = join('', @_); + local($NL) = $nl ? "\n" : "\r"; + $thislength = length($text); + if ($thislength >= $last_message_length) { + print STDERR $text, $NL; + } else { + print STDERR $text, ' 'x ($last_message_length-$thislength), $NL; + } + $last_message_length = $nl ? 0 : $thislength; +} + +sub clear_message +{ + if ($last_message_length) { + print STDERR ' ' x $last_message_length, "\r"; + $last_message_length = 0; + } +} + +1; +__END__ |