summaryrefslogtreecommitdiff
path: root/support/logresolve.c
diff options
context:
space:
mode:
authorColm MacCarthaigh <colm@apache.org>2005-10-24 01:52:56 +0000
committerColm MacCarthaigh <colm@apache.org>2005-10-24 01:52:56 +0000
commit811b2867abef4a3ecf8d7b880e9ca97513755198 (patch)
tree7f19c67b5e4cf04f44f7fb2312d45e3770caaa86 /support/logresolve.c
parent5f4f3233d490440327a8b47d83cf95d774fb42f3 (diff)
downloadhttpd-811b2867abef4a3ecf8d7b880e9ca97513755198.tar.gz
A complete rewrite of support/logresolve.c:
* Now uses APR * Supports IPv6 addresses * Slighty less hurtful on memory * archaic (but interesting) notes marked as historical * Will not perform very well without APR >= 1.3 due to lack of buffered file-io. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@327909 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'support/logresolve.c')
-rw-r--r--support/logresolve.c529
1 files changed, 230 insertions, 299 deletions
diff --git a/support/logresolve.c b/support/logresolve.c
index e497d2e0c3..1eb1c7472d 100644
--- a/support/logresolve.c
+++ b/support/logresolve.c
@@ -15,12 +15,13 @@
*/
/*
- * logresolve 1.1
+ * logresolve 2.0
*
* Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
* UUNET Canada, April 16, 1995
*
* Rewritten by David Robinson. (drtr ast.cam.ac.uk)
+ * Rewritten again, and ported to APR by Colm MacCarthaigh
*
* Usage: logresolve [-s filename] [-c] < access_log > new_log
*
@@ -28,7 +29,7 @@
* -s filename name of a file to record statistics
* -c check the DNS for a matching A record for the host.
*
- * Notes:
+ * Notes: (For historical interest)
*
* To generate meaningful statistics from an HTTPD log file, it's good
* to have the domain name of each machine that accessed your site, but
@@ -55,333 +56,263 @@
* that one of these matches the original address.
*/
+#include "apr.h"
#include "apr_lib.h"
-#if APR_HAVE_STDIO_H
-#include <stdio.h>
-#endif
+#include "apr_hash.h"
+#include "apr_getopt.h"
+#include "apr_strings.h"
+#include "apr_file_io.h"
+#include "apr_network_io.h"
+
#if APR_HAVE_STDLIB_H
#include <stdlib.h>
#endif
-#if APR_HAVE_CTYPE_H
-#include <ctype.h>
-#endif
-#if APR_HAVE_NETDB_H
-#include <netdb.h>
-#endif
-#if APR_HAVE_NETINET_IN_H
-#include <netinet/in.h>
-#endif
-#if APR_HAVE_STRING_H
-#include <string.h>
-#endif
-#if APR_HAVE_SYS_SOCKET_H
-#include <sys/socket.h>
-#endif
-#if APR_HAVE_ARPA_INET_H
-#include <arpa/inet.h>
-#endif
-
-static void cgethost(struct in_addr ipnum, char *string, int check);
-static int get_line(char *s, int n);
-static void stats(FILE *output);
-
-#ifdef BEOS
-#define NO_ADDRESS NO_DATA
-#endif
-
-
-/* maximum line length */
-#ifndef MAXLINE
-#define MAXLINE 1024
-#endif
-
-/* maximum length of a domain name */
-#ifndef MAXDNAME
-#define MAXDNAME 256
-#endif
-/* number of buckets in cache hash apr_table_t */
-#define BUCKETS 256
-
-/*
- * struct nsrec - record of nameservice for cache linked list
- *
- * ipnum - IP number hostname - hostname noname - nonzero if IP number has no
- * hostname, i.e. hostname=IP number
- */
-
-struct nsrec {
- struct in_addr ipnum;
- char *hostname;
- int noname;
- struct nsrec *next;
-} *nscache[BUCKETS];
-
-/*
- * statistics - obvious
- */
-
-#ifndef h_errno
-#ifdef __CYGWIN__
-extern __declspec(dllimport) int h_errno;
-#else
-extern int h_errno; /* some machines don't have this in their headers */
-#endif
-#endif
-
-/* largest value for h_errno */
-
-#define MAX_ERR (NO_ADDRESS)
-#define UNKNOWN_ERR (MAX_ERR+1)
-#define NO_REVERSE (MAX_ERR+2)
+static apr_file_t *errfile;
+static const char *shortname = "logresolve";
+static apr_hash_t *cache;
+/* Statistics */
static int cachehits = 0;
static int cachesize = 0;
static int entries = 0;
static int resolves = 0;
static int withname = 0;
-static int errors[MAX_ERR + 3];
-
-/*
- * cgethost - gets hostname by IP address, caching, and adding unresolvable
- * IP numbers with their IP number as hostname, setting noname flag
- */
-
-static void cgethost (struct in_addr ipnum, char *string, int check)
-{
- struct nsrec **current, *new;
- struct hostent *hostdata;
- char *name;
-
- current = &nscache[((ipnum.s_addr + (ipnum.s_addr >> 8) +
- (ipnum.s_addr >> 16) + (ipnum.s_addr >> 24)) % BUCKETS)];
-
- while (*current != NULL && ipnum.s_addr != (*current)->ipnum.s_addr)
- current = &(*current)->next;
-
- if (*current == NULL) {
- cachesize++;
- new = (struct nsrec *) malloc(sizeof(struct nsrec));
- if (new == NULL) {
- perror("malloc");
- fprintf(stderr, "Insufficient memory\n");
- exit(1);
- }
- *current = new;
- new->next = NULL;
-
- new->ipnum = ipnum;
-
- hostdata = gethostbyaddr((const char *) &ipnum, sizeof(struct in_addr),
- AF_INET);
- if (hostdata == NULL) {
- if (h_errno > MAX_ERR)
- errors[UNKNOWN_ERR]++;
- else
- errors[h_errno]++;
- new->noname = h_errno;
- name = strdup(inet_ntoa(ipnum));
- }
- else {
- new->noname = 0;
- name = strdup(hostdata->h_name);
- if (check) {
- if (name == NULL) {
- perror("strdup");
- fprintf(stderr, "Insufficient memory\n");
- exit(1);
- }
- hostdata = gethostbyname(name);
- if (hostdata != NULL) {
- char **hptr;
-
- for (hptr = hostdata->h_addr_list; *hptr != NULL; hptr++)
- if (((struct in_addr *) (*hptr))->s_addr == ipnum.s_addr)
- break;
- if (*hptr == NULL)
- hostdata = NULL;
- }
- if (hostdata == NULL) {
- fprintf(stderr, "Bad host: %s != %s\n", name,
- inet_ntoa(ipnum));
- new->noname = NO_REVERSE;
- free(name);
- name = strdup(inet_ntoa(ipnum));
- errors[NO_REVERSE]++;
- }
- }
- }
- new->hostname = name;
- if (new->hostname == NULL) {
- perror("strdup");
- fprintf(stderr, "Insufficient memory\n");
- exit(1);
- }
- }
- else
- cachehits++;
-
- /* size of string == MAXDNAME +1 */
- strncpy(string, (*current)->hostname, MAXDNAME);
- string[MAXDNAME] = '\0';
-}
+static int doublefailed = 0;
+static int noreverse = 0;
/*
* prints various statistics to output
*/
-
-static void stats (FILE *output)
+#define NL APR_EOL_STR
+static void print_statistics (apr_file_t *output)
{
- int i;
- char *ipstring;
- struct nsrec *current;
- char *errstring[MAX_ERR + 3];
-
- for (i = 0; i < MAX_ERR + 3; i++)
- errstring[i] = "Unknown error";
- errstring[HOST_NOT_FOUND] = "Host not found";
- errstring[TRY_AGAIN] = "Try again";
- errstring[NO_RECOVERY] = "Non recoverable error";
- errstring[NO_DATA] = "No data record";
- errstring[NO_ADDRESS] = "No address";
- errstring[NO_REVERSE] = "No reverse entry";
-
- fprintf(output, "logresolve Statistics:\n");
-
- fprintf(output, "Entries: %d\n", entries);
- fprintf(output, " With name : %d\n", withname);
- fprintf(output, " Resolves : %d\n", resolves);
- if (errors[HOST_NOT_FOUND])
- fprintf(output, " - Not found : %d\n", errors[HOST_NOT_FOUND]);
- if (errors[TRY_AGAIN])
- fprintf(output, " - Try again : %d\n", errors[TRY_AGAIN]);
- if (errors[NO_DATA])
- fprintf(output, " - No data : %d\n", errors[NO_DATA]);
- if (errors[NO_ADDRESS])
- fprintf(output, " - No address: %d\n", errors[NO_ADDRESS]);
- if (errors[NO_REVERSE])
- fprintf(output, " - No reverse: %d\n", errors[NO_REVERSE]);
- fprintf(output, "Cache hits : %d\n", cachehits);
- fprintf(output, "Cache size : %d\n", cachesize);
- fprintf(output, "Cache buckets : IP number * hostname\n");
+ apr_file_printf(output, "logresolve Statistics:" NL);
+ apr_file_printf(output, "Entries: %d" NL, entries);
+ apr_file_printf(output, " With name : %d" NL, withname);
+ apr_file_printf(output, " Resolves : %d" NL, resolves);
+
+ if (noreverse) {
+ apr_file_printf(output, " - No reverse : %d" NL,
+ noreverse);
+ }
+
+ if (doublefailed) {
+ apr_file_printf(output, " - Double lookup failed : %d" NL,
+ doublefailed);
+ }
- for (i = 0; i < BUCKETS; i++)
- for (current = nscache[i]; current != NULL; current = current->next) {
- ipstring = inet_ntoa(current->ipnum);
- if (current->noname == 0)
- fprintf(output, " %3d %15s - %s\n", i, ipstring,
- current->hostname);
- else {
- if (current->noname > MAX_ERR + 2)
- fprintf(output, " %3d %15s : Unknown error\n", i,
- ipstring);
- else
- fprintf(output, " %3d %15s : %s\n", i, ipstring,
- errstring[current->noname]);
- }
- }
+ apr_file_printf(output, "Cache hits : %d" NL, cachehits);
+ apr_file_printf(output, "Cache size : %d" NL, cachesize);
}
-
/*
- * gets a line from stdin
+ * usage info
*/
-
-static int get_line (char *s, int n)
+static void usage(void)
{
- char *cp;
-
- if (!fgets(s, n, stdin))
- return (0);
- cp = strchr(s, '\n');
- if (cp)
- *cp = '\0';
- return (1);
+ apr_file_printf(errfile,
+ "%s -- Resolve IP-addresses to hostnames in Apache log files." NL
+ "Usage: %s [-s STATFILE] [-c]" NL
+ NL
+ "Options:" NL
+ " -s Record statistics to STATFILE when finished." NL
+ NL
+ " -c Perform double lookups when resolving IP addresses." NL,
+ shortname, shortname);
+ exit(1);
}
-
-int main (int argc, char *argv[])
+#undef NL
+
+int main(int argc, const char * const argv[])
{
- struct in_addr ipnum;
- char *bar, hoststring[MAXDNAME + 1], line[MAXLINE], *statfile;
- int i, check;
-
-#if defined(WIN32) || (defined(NETWARE) && defined(USE_WINSOCK))
- /* If we apr'ify this code, apr_pool_create/apr_pool_destroy
- * should perform the WSAStartup/WSACleanup for us.
- */
- WSADATA wsaData;
- WSAStartup(MAKEWORD(2, 0), &wsaData);
+ apr_file_t * outfile;
+ apr_file_t * infile;
+ apr_file_t * statsfile;
+ apr_sockaddr_t * ip;
+ apr_sockaddr_t * ipdouble;
+ apr_getopt_t * o;
+ apr_pool_t * pool;
+ apr_status_t status;
+ const char * arg;
+ char opt;
+ char * stats = NULL;
+ char * space;
+ char * hostname;
+#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
+ char * inbuffer;
+ char * outbuffer;
#endif
-
- check = 0;
- statfile = NULL;
- for (i = 1; i < argc; i++) {
- if (strcmp(argv[i], "-c") == 0)
- check = 1;
- else if (strcmp(argv[i], "-s") == 0) {
- if (i == argc - 1) {
- fprintf(stderr, "logresolve: missing filename to -s\n");
- exit(1);
- }
- i++;
- statfile = argv[i];
- }
- else {
- fprintf(stderr, "Usage: logresolve [-s statfile] [-c] < input > output\n");
- exit(0);
- }
+ char line[2048];
+ int doublelookups = 0;
+
+ if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
+ return 1;
}
-
- for (i = 0; i < BUCKETS; i++)
- nscache[i] = NULL;
- for (i = 0; i < MAX_ERR + 2; i++)
- errors[i] = 0;
-
- while (get_line(line, MAXLINE)) {
- if (line[0] == '\0')
- continue;
- entries++;
- if (!apr_isdigit(line[0])) { /* short cut */
- puts(line);
- withname++;
- continue;
- }
- bar = strchr(line, ' ');
- if (bar != NULL)
- *bar = '\0';
- ipnum.s_addr = inet_addr(line);
- if (ipnum.s_addr == 0xffffffffu) {
- if (bar != NULL)
- *bar = ' ';
- puts(line);
- withname++;
- continue;
- }
-
- resolves++;
-
- cgethost(ipnum, hoststring, check);
- if (bar != NULL)
- printf("%s %s\n", hoststring, bar + 1);
- else
- puts(hoststring);
+ atexit(apr_terminate);
+
+ if (argc) {
+ shortname = apr_filepath_name_get(argv[0]);
}
-#if defined(WIN32) || (defined(NETWARE) && defined(USE_WINSOCK))
- WSACleanup();
+ if (apr_pool_create(&pool, NULL) != APR_SUCCESS) {
+ return 1;
+ }
+ apr_file_open_stderr(&errfile, pool);
+ apr_getopt_init(&o, pool, argc, argv);
+
+ while (1) {
+ status = apr_getopt(o, "s:c", &opt, &arg);
+ if (status == APR_EOF) {
+ break;
+ }
+ else if (status != APR_SUCCESS) {
+ usage();
+ }
+ else {
+ switch (opt) {
+ case 'c':
+ if (doublelookups) {
+ usage();
+ }
+ doublelookups = 1;
+ break;
+ case 's':
+ if (stats) {
+ usage();
+ }
+ stats = apr_pstrdup(pool, arg);
+ break;
+ } /* switch */
+ } /* else */
+ } /* while */
+
+ apr_file_open_stdout(&outfile, pool);
+ apr_file_open_stdin(&infile, pool);
+
+#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
+ /* Allocate two new 10k file buffers */
+ if ((outbuffer = apr_palloc(pool, 10240)) == NULL ||
+ (inbuffer = apr_palloc(pool, 10240)) == NULL) {
+ return 1;
+ }
+
+ /* Set the buffers */
+ apr_file_buffer_set(infile, inbuffer, 10240);
+ apr_file_buffer_set(outfile, outbuffer, 10240);
#endif
+
+ cache = apr_hash_make(pool);
+
+ while(apr_file_gets(line, 2048, infile) == APR_SUCCESS) {
+ if (line[0] == '\0') {
+ continue;
+ }
+
+ /* Count our log entries */
+ entries++;
+
+ /* Check if this could even be an IP address */
+ if (!apr_isxdigit(line[0]) && line[0] != ':') {
+ withname++;
+ apr_file_puts(line, outfile);
+ continue;
+ }
+
+ /* Terminate the line at the next space */
+ if((space = strchr(line, ' ')) != NULL) {
+ *space = '\0';
+ }
+
+ /* See if we have it in our cache */
+ hostname = (char *) apr_hash_get(cache, (const void *)line,
+ strlen(line));
+ if (hostname) {
+ apr_file_printf(outfile, "%s %s", hostname, space + 1);
+ cachehits++;
+ continue;
+ }
+
+ /* Parse the IP address */
+ status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC ,0, 0, pool);
+ if (status != APR_SUCCESS) {
+ /* Not an IP address */
+ withname++;
+ *space = ' ';
+ apr_file_puts(line, outfile);
+ continue;
+ }
+
+ /* This does not make much sense, but historically "resolves" means
+ * "parsed as an IP address". It does not mean we actually resolved
+ * the IP address into a hostname.
+ */
+ resolves++;
+
+ /* From here on our we cache each result, even if it was not
+ * succesful
+ */
+ cachesize++;
+
+ /* Try and perform a reverse lookup */
+ status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS;
+ if (status || hostname == NULL) {
+ /* Could not perform a reverse lookup */
+ *space = ' ';
+ apr_file_puts(line, outfile);
+ noreverse++;
+
+ /* Add to cache */
+ *space = '\0';
+ apr_hash_set(cache, (const void *) line, strlen(line),
+ (const void *) apr_pstrdup(pool, line));
+ continue;
+ }
+
+ /* Perform a double lookup */
+ if (doublelookups) {
+ /* Do a forward lookup on our hostname, and see if that matches our
+ * original IP address.
+ */
+ status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0,
+ 0, pool);
+ if (status == APR_SUCCESS ||
+ memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) {
+ /* Double-lookup failed */
+ *space = ' ';
+ apr_file_puts(line, outfile);
+ doublefailed++;
+
+ /* Add to cache */
+ *space = '\0';
+ apr_hash_set(cache, (const void *) line, strlen(line),
+ (const void *) apr_pstrdup(pool, line));
+ continue;
+ }
+ }
+
+ /* Outout the resolved name */
+ apr_file_printf(outfile, "%s %s", hostname, space + 1);
+
+ /* Store it in the cache */
+ apr_hash_set(cache, (const void *) line, strlen(line),
+ (const void *) apr_pstrdup(pool, hostname));
+ }
- if (statfile != NULL) {
- FILE *fp;
- fp = fopen(statfile, "w");
- if (fp == NULL) {
- fprintf(stderr, "logresolve: could not open statistics file '%s'\n"
- ,statfile);
- exit(1);
- }
- stats(fp);
- fclose(fp);
+ /* Flush any remaining output */
+ apr_file_flush(outfile);
+
+ if (stats) {
+ if (apr_file_open(&statsfile, stats,
+ APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE,
+ APR_OS_DEFAULT, pool) != APR_SUCCESS) {
+ apr_file_printf(errfile, "%s: Could not open %s for writing.",
+ shortname, stats);
+ return 1;
+ }
+ print_statistics(statsfile);
+ apr_file_close(statsfile);
}
- return (0);
+ return 0;
}