summaryrefslogtreecommitdiff
path: root/support/logresolve.pl.in
diff options
context:
space:
mode:
authorKen Coar <coar@apache.org>2001-03-11 23:40:47 +0000
committerKen Coar <coar@apache.org>2001-03-11 23:40:47 +0000
commit0108291673ab89bd2765828744f12cbccf361773 (patch)
treef294f4bb369b2430883f74c45fd08b827923a42e /support/logresolve.pl.in
parente49204885521e1b6d2b6e77ab0260e6593f72b5e (diff)
downloadhttpd-0108291673ab89bd2765828744f12cbccf361773.tar.gz
Bring the other Perl scripts into the autoconf-edited
find-the-interpreter fold, like apxs and apachectl. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@88495 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'support/logresolve.pl.in')
-rw-r--r--support/logresolve.pl.in261
1 files changed, 261 insertions, 0 deletions
diff --git a/support/logresolve.pl.in b/support/logresolve.pl.in
new file mode 100644
index 0000000000..c2142f161d
--- /dev/null
+++ b/support/logresolve.pl.in
@@ -0,0 +1,261 @@
+#!@perlbin@
+# ====================================================================
+# The Apache Software License, Version 1.1
+#
+# Copyright (c) 2000-2001 The Apache Software Foundation. All rights
+# reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# 3. The end-user documentation included with the redistribution,
+# if any, must include the following acknowledgment:
+# "This product includes software developed by the
+# Apache Software Foundation (http://www.apache.org/)."
+# Alternately, this acknowledgment may appear in the software itself,
+# if and wherever such third-party acknowledgments normally appear.
+#
+# 4. The names "Apache" and "Apache Software Foundation" must
+# not be used to endorse or promote products derived from this
+# software without prior written permission. For written
+# permission, please contact apache@apache.org.
+#
+# 5. Products derived from this software may not be called "Apache",
+# nor may "Apache" appear in their name, without prior written
+# permission of the Apache Software Foundation.
+#
+# THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+# ====================================================================
+#
+# This software consists of voluntary contributions made by many
+# individuals on behalf of the Apache Software Foundation. For more
+# information on the Apache Software Foundation, please see
+# <http://www.apache.org/>.
+#
+# logresolve.pl
+#
+# v 1.2 by robh @ imdb.com
+#
+# usage: logresolve.pl <infile >outfile
+#
+# input = Apache/NCSA/.. logfile with IP numbers at start of lines
+# output = same logfile with IP addresses resolved to hostnames where
+# name lookups succeeded.
+#
+# this differs from the C based 'logresolve' in that this script
+# spawns a number ($CHILDREN) of subprocesses to resolve addresses
+# concurrently and sets a short timeout ($TIMEOUT) for each lookup in
+# order to keep things moving quickly.
+#
+# the parent process handles caching of IP->hostnames using a Perl hash
+# it also avoids sending the same IP to multiple child processes to be
+# resolved multiple times concurrently.
+#
+# Depending on the settings of $CHILDREN and $TIMEOUT you should see
+# significant reductions in the overall time taken to resolve your
+# logfiles. With $CHILDREN=40 and $TIMEOUT=5 I've seen 200,000 - 300,000
+# logfile lines processed per hour compared to ~45,000 per hour
+# with 'logresolve'.
+#
+# I haven't yet seen any noticable reduction in the percentage of IPs
+# that fail to get resolved. Your mileage will no doubt vary. 5s is long
+# enough to wait IMO.
+#
+# Known to work with FreeBSD 2.2
+# Known to have problems with Solaris
+#
+# 980417 - use 'sockaddr_un' for bind/connect to make the script work
+# with linux. Fix from Luuk de Boer <luuk_de_boer@pi.net>
+
+require 5.004;
+
+$|=1;
+
+use FileHandle;
+use Socket;
+
+use strict;
+no strict 'refs';
+
+use vars qw($PROTOCOL);
+$PROTOCOL = 0;
+
+my $CHILDREN = 40;
+my $TIMEOUT = 5;
+
+my $filename;
+my %hash = ();
+my $parent = $$;
+
+my @children = ();
+for (my $child = 1; $child <=$CHILDREN; $child++) {
+ my $f = fork();
+ if (!$f) {
+ $filename = "./.socket.$parent.$child";
+ if (-e $filename) { unlink($filename) || warn "$filename .. $!\n";}
+ &child($child);
+ exit(0);
+ }
+ push(@children, $f);
+}
+
+&parent;
+&cleanup;
+
+## remove all temporary files before shutting down
+sub cleanup {
+ # die kiddies, die
+ kill(15, @children);
+ for (my $child = 1; $child <=$CHILDREN; $child++) {
+ if (-e "./.socket.$parent.$child") {
+ unlink("./.socket.$parent.$child")
+ || warn ".socket.$parent.$child $!";
+ }
+ }
+}
+
+sub parent {
+ # Trap some possible signals to trigger temp file cleanup
+ $SIG{'KILL'} = $SIG{'INT'} = $SIG{'PIPE'} = \&cleanup;
+
+ my %CHILDSOCK;
+ my $filename;
+
+ ## fork child processes. Each child will create a socket connection
+ ## to this parent and use an unique temp filename to do so.
+ for (my $child = 1; $child <=$CHILDREN; $child++) {
+ $CHILDSOCK{$child}= FileHandle->new;
+
+ if (!socket($CHILDSOCK{$child}, AF_UNIX, SOCK_STREAM, $PROTOCOL)) {
+ warn "parent socket to child failed $!";
+ }
+ $filename = "./.socket.$parent.$child";
+ my $response;
+ do {
+ $response = connect($CHILDSOCK{$child}, sockaddr_un($filename));
+ if ($response != 1) {
+ sleep(1);
+ }
+ } while ($response != 1);
+ $CHILDSOCK{$child}->autoflush;
+ }
+ ## All child processes should now be ready or at worst warming up
+
+ my (@buffer, $child, $ip, $rest, $hostname, $response);
+ ## read the logfile lines from STDIN
+ while(<STDIN>) {
+ @buffer = (); # empty the logfile line buffer array.
+ $child = 1; # children are numbered 1..N, start with #1
+
+ # while we have a child to talk to and data to give it..
+ do {
+ push(@buffer, $_); # buffer the line
+ ($ip, $rest) = split(/ /, $_, 2); # separate IP form rest
+
+ unless ($hash{$ip}) { # resolve if unseen IP
+ $CHILDSOCK{$child}->print("$ip\n"); # pass IP to next child
+ $hash{$ip} = $ip; # don't look it up again.
+ $child++;
+ }
+ } while (($child < ($CHILDREN-1)) and ($_ = <STDIN>));
+
+ ## now poll each child for a response
+ while (--$child > 0) {
+ $response = $CHILDSOCK{$child}->getline;
+ chomp($response);
+ # child sends us back both the IP and HOSTNAME, no need for us
+ # to remember what child received any given IP, and no worries
+ # what order we talk to the children
+ ($ip, $hostname) = split(/\|/, $response, 2);
+ $hash{$ip} = $hostname;
+ }
+
+ # resolve all the logfiles lines held in the log buffer array..
+ for (my $line = 0; $line <=$#buffer; $line++) {
+ # get next buffered line
+ ($ip, $rest) = split(/ /, $buffer[$line], 2);
+ # separate IP from rest and replace with cached hostname
+ printf STDOUT ("%s %s", $hash{$ip}, $rest);
+ }
+ }
+}
+
+########################################
+
+sub child {
+ # arg = numeric ID - how the parent refers to me
+ my $me = shift;
+
+ # add trap for alarm signals.
+ $SIG{'ALRM'} = sub { die "alarmed"; };
+
+ # create a socket to communicate with parent
+ socket(INBOUND, AF_UNIX, SOCK_STREAM, $PROTOCOL)
+ || die "Error with Socket: !$\n";
+ $filename = "./.socket.$parent.$me";
+ bind(INBOUND, sockaddr_un($filename))
+ || die "Error Binding $filename: $!\n";
+ listen(INBOUND, 5) || die "Error Listening: $!\n";
+
+ my ($ip, $send_back);
+ my $talk = FileHandle->new;
+
+ # accept a connection from the parent process. We only ever have
+ # have one connection where we exchange 1 line of info with the
+ # parent.. 1 line in (IP address), 1 line out (IP + hostname).
+ accept($talk, INBOUND) || die "Error Accepting: $!\n";
+ # disable I/O buffering just in case
+ $talk->autoflush;
+ # while the parent keeps sending data, we keep responding..
+ while(($ip = $talk->getline)) {
+ chomp($ip);
+ # resolve the IP if time permits and send back what we found..
+ $send_back = sprintf("%s|%s", $ip, &nslookup($ip));
+ $talk->print($send_back."\n");
+ }
+}
+
+# perform a time restricted hostname lookup.
+sub nslookup {
+ # get the IP as an arg
+ my $ip = shift;
+ my $hostname = undef;
+
+ # do the hostname lookup inside an eval. The eval will use the
+ # already configured SIGnal handler and drop out of the {} block
+ # regardless of whether the alarm occured or not.
+ eval {
+ alarm($TIMEOUT);
+ $hostname = gethostbyaddr(gethostbyname($ip), AF_INET);
+ alarm(0);
+ };
+ if ($@ =~ /alarm/) {
+ # useful for debugging perhaps..
+ # print "alarming, isn't it? ($ip)";
+ }
+
+ # return the hostname or the IP address itself if there is no hostname
+ $hostname ne "" ? $hostname : $ip;
+}
+
+