generate webalizer stats

author: Nikos Mavrogiannopoulos <nmav@gnutls.org> 2012-12-05 23:45:03 +0100
committer: Nikos Mavrogiannopoulos <nmav@gnutls.org> 2012-12-05 23:45:03 +0100
commit: fb5811f60e0716132db26c35cdb9f21af79ed9ff (patch)
tree: 4bc6a962d649ea97171ccd14d2eb114036e95b6d
parent: b33145bc8d4ae5fc41a321d8c8178ea368db3fbc (diff)
download: gnutls-fb5811f60e0716132db26c35cdb9f21af79ed9ff.tar.gz
3 files changed, 872 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore
index b25c15b81f..86c315eb6a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 *~
+stats/
diff --git a/Makefile b/Makefile
index c700ef5db8..a64a6a87f5 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,16 @@ all: $(OUTPUT) news.atom
 	@for i in news-entries/*.xml;do X=0; if ! test -e $$i.tweet;then X=1;fi;done;if test "$$X" = "1";then echo "There are unsubmitted news. Use 'make tweet'.";fi
 #	cvs commit -m "Generated." .
 
-.PHONY: clean manual/index.html tweet
+.PHONY: clean manual/index.html tweet stats
+
+logs/all.log:
+	mkdir -p logs && cd logs && rsync -av trithemius.gnupg.org:/var/log/boa/www.gnutls.org-access* .
+	cd logs && for i in *access*;do zcat -f $$i >all.log;done
+
+stats: logs/all.log
+	mkdir -p stats
+	cut -f '2-' -d ' ' --output-delimiter=" " <logs/all.log >logs/new.log
+	webalizer -c stats/webalizer.conf logs/new.log -o stats/ -Dcache.db
 
 manual/index.html: manual/index.html.bak
 	@cp -f manual/index.html.bak $@
diff --git a/stats/webalizer.conf b/stats/webalizer.conf
new file mode 100644
index 0000000000..9d87f30e9b
--- /dev/null
+++ b/stats/webalizer.conf
@@ -0,0 +1,861 @@
+#
+# Sample Webalizer Xtended configuration file
+# Copyright 2005-2011 by Patrick K. Frei (pa_frei@bluewin.ch)
+# Based on the sample Webalizer configuration file
+# Copyright 1997-2011 by Bradford L. Barrett
+#
+# Distributed under the GNU General Public License.  See the
+# files "Copyright" and "COPYING" provided with the webalizer
+# distribution for additional information.
+#
+# This is a sample configuration file for the Webalizer (ver 2.20)
+# Lines starting with pound signs '#' are comment lines and are
+# ignored.  Blank lines are skipped as well.  Other lines are considered
+# as configuration lines, and have the form "ConfigOption  Value" where
+# ConfigOption is a valid configuration keyword, and Value is the value
+# to assign that configuration option.  Invalid keyword/values are
+# ignored, with appropriate warnings being displayed.  There must be
+# at least one space or tab between the keyword and its value.
+#
+# As of version 0.98, The Webalizer will look for a 'default' configuration
+# file named "webalizer.conf" in the current directory, and if not found
+# there, will look for "/etc/webalizer.conf".
+
+
+# LogFile defines the web server log file to use.  If not specified
+# here or on on the command line, input will default to STDIN.  If
+# the log filename ends in '.gz' (a gzip compressed file), or '.bz2'
+# (bzip2 compressed file), it will be decompressed on the fly as it
+# is being read.
+
+#LogFile        /var/lib/httpd/logs/access_log
+
+# LogType defines the log type being processed.  Normally, the Webalizer
+# expects a CLF or Combined web server log as input.  Using this option,
+# you can process ftp logs (xferlog as produced by wu-ftp and others),
+# Squid native logs or W3C extended format web logs. Values can be 'clf',
+# 'ftp', 'squid' or 'w3c'.  The default is 'clf'.
+
+#LogType	clf
+
+# OutputDir is where you want to put the output files.  This should
+# should be a full path name, however relative ones might work as well.
+# If no output directory is specified, the current directory will be used.
+
+#OutputDir      /var/lib/httpd/htdocs/usage
+
+# HistoryName allows you to specify the name of the history file produced
+# by the Webalizer.  The history file keeps the data for previous months,
+# and is used for generating the main HTML page (index.html). The default
+# is a file named "webalizer.hist", stored in the output directory being
+# used.  The name can include a path, which will be relative to the output
+# directory unless absolute (starts with a leading '/').
+
+#HistoryName	webalizer.hist
+
+# Incremental processing allows multiple partial log files to be used
+# instead of one huge one.  Useful for large sites that have to rotate
+# their log files more than once a month.  The Webalizer will save its
+# internal state before exiting, and restore it the next time run, in
+# order to continue processing where it left off.  This mode also causes
+# The Webalizer to scan for and ignore duplicate records (records already
+# processed by a previous run).  See the README file for additional
+# information.  The value may be 'yes' or 'no', with a default of 'no'.
+# The file 'webalizer.current' is used to store the current state data,
+# and is located in the output directory of the program (unless changed
+# with the IncrementalName option below).  Please read at least the section
+# on Incremental processing in the README file before you enable this option.
+
+#Incremental	no
+
+# IncrementalName allows you to specify the filename for saving the
+# incremental data in.  It is similar to the HistoryName option where the
+# name is relative to the specified output directory, unless an absolute
+# filename is specified.  The default is a file named "webalizer.current"
+# kept in the normal output directory.  If you don't specify "Incremental"
+# as 'yes' then this option has no meaning.
+
+#IncrementalName	webalizer.current
+
+# ReportTitle is the text to display as the title.  The hostname
+# (unless blank) is appended to the end of this string (seperated with
+# a space) to generate the final full title string.
+# Default is (for english) "Usage Statistics for".
+
+ReportTitle    Usage Statistics for
+
+# HostName defines the hostname for the report.  This is used in
+# the title, and is prepended to the URL table items.  This allows
+# clicking on URLs in the report to go to the proper location in
+# the event you are running the report on a 'virtual' web server,
+# or for a server different than the one the report resides on.
+# If not specified here, or on the command line, webalizer will
+# try to get the hostname via a uname system call.  If that fails,
+# it will default to "localhost".
+
+HostName	www.gnutls.org
+
+# HTMLExtension allows you to specify the filename extension to use
+# for generated HTML pages.  Normally, this defaults to "html", but
+# can be changed for sites who need it (like for PHP embeded pages).
+
+#HTMLExtension  html
+
+# PageType lets you tell the Webalizer what types of URLs you
+# consider a 'page'.  Most people consider html and cgi documents
+# as pages, while not images and audio files.  If no types are
+# specified, defaults will be used ('htm*', 'cgi' and HTMLExtension
+# if different for web logs, 'txt' for ftp logs).
+
+PageType	htm*
+PageType	cgi
+PageType	atom
+#PageType	phtml
+#PageType	php
+#PageType	php3
+#PageType	pl
+
+# PagePrefix allows all requests with a specified prefix to be
+# considered as 'pages'. If you want everything under /documents
+# to be treated as pages no matter what their extension is. Also
+# useful if you have cgi-scripts with PATH_INFO.
+
+#PagePrefix	/documents
+#PagePrefix	/mycgi/parameters
+
+# OmitPage lets you tell the Webalizer that certain URLs do not
+# contain any pages.  No URL matching an OmitPage value will be
+# counted as a page, even if it matches a PageType above or has
+# no extension (e.g., a directory).  They will still be counted
+# as a hit.
+
+#OmitPage	/render
+
+# UseHTTPS should be used if the analysis is being run on a
+# secure server, and links to urls should use 'https://' instead
+# of the default 'http://'.  If you need this, set it to 'yes'.
+# Default is 'no'.  This only changes the behaviour of the 'Top
+# URLs' table.
+
+#UseHTTPS       no
+
+# HTAccess allows the generation of a default .htaccess file in the
+# output directory.  If enabled, a default .htaccess file will be
+# created (with a single "DirectoryIndex" directive), unless one
+# already exists.  Values may be 'yes' or 'no', with 'no'
+# being the default (don't write .htaccess files).
+
+#HTAccess	no
+
+# StripCGI determines if URL CGI variables should be striped or not.
+# Historically, the Webalizer stripped all CGI variables from the end
+# of URLs to improve accuracy.  Some sites may prefer to keep the CGI
+# variables in place, particularly those with highly dynamic pages.
+# Values may be 'yes' or 'no', with the default being 'yes'.
+
+#StripCGI	yes
+
+# The TrimSquidURL option only has effect on squid type log files.
+# When analyzing a squid log, it is usually desirable to have less
+# granularity on the URLs.  TrimSquidURL = n where n is a number > 0
+# causes all URLs to be truncated after the nth '/' after the http://
+# portion.  Setting TrimSquidURL to one (1) will cause all URLs to be
+# summarized by domain only.  The default is zero (0), which disables
+# any such truncation and preserve the URLs as they are in the log.
+
+# TrimSquidURL	0
+
+# DNSCache specifies the DNS cache filename to use for reverse DNS lookups.
+# This file must be specified if you wish to perform name lookups on any IP
+# addresses found in the log file.  If an absolute path is not given as
+# part of the filename (ie: starts with a leading '/'), then the name is
+# relative to the default output directory.  See the DNS.README file for
+# additional information.
+
+DNSCache	cache.db
+
+# DNSChildren allows you to specify how many "children" processes are
+# run to perform DNS lookups to create or update the DNS cache file.
+# If a number is specified, the DNS cache file will be created/updated
+# each time the Webalizer is run, immediately prior to normal processing,
+# by running the specified number of "children" processes to perform
+# DNS lookups.  If used, the DNS cache filename MUST be specified as
+# well.  The default value is zero (0), which disables DNS cache file
+# creation/updates at run time.  The number of children processes to
+# run may be anywhere from 1 to 100, however a large number may effect
+# normal system operations.  Reasonable values should be between 5 and
+# 20.  See the DNS.README file for additional information.
+
+DNSChildren	17
+
+# CacheIPs allows unresolved IP addresses to be cached in the DNS
+# database.  Normally, only resolved addresses are saved.  At some
+# sites, particularly those with a large number of unresolvable IP
+# addresses visiting, it may be useful to enable this feature so
+# those addresses are not constantly looked up each time the program
+# is run.  Values can be 'yes' or 'no', with 'no' being the default.
+
+CacheIPs	yes
+
+# CacheTTL specifies the time to live (TTL) value for cached DNS
+# entries, in days.  This value may be anywhere between 1 and 100
+# with the default being 7 days (1 week).
+
+CacheTTL	15
+
+# The GeoDB option enables or disabled the use of the native
+# Webalizer GeoDB geolocation services.  This is the preferred
+# geolocation option.  Values may be 'yes' or 'no', with 'no'
+# being the default.
+
+#GeoDB		no
+
+# GeoDBDatabase specifies an alternate database to use.  The
+# default database is /usr/share/GeoDB/GeoDB.dat (however the
+# path may be changed at compile time; use the -vV command
+# line option to determine where).  If a different database is
+# to be used, it may be specified here.  The name is relative
+# to the output directory being used unless an absolute name
+# (ie: starts with a leading '/') is specified.
+
+#GeoDBDatabase	/usr/share/GeoDB/GeoDB.dat
+
+# The GeoIP option enables or disables the use of geolocation
+# services provided by the GeoIP library (http://www.maxmind.com),
+# if available.  Values may be 'yes' or 'no, with 'no' being the
+# default.  Note: if GeoDB is enabled, then this option will have
+# no effect (GeoDB will be used regardless of this setting).
+
+GeoIP		yes
+
+# GeoIPDatabase specifies an alternate database filename to use by the
+# GeoIP library.  If an absolute path is not given as part of the name
+# (ie: starts with a leading '/'), then the name is relative to the
+# default output directory. This option should not normally be needed.
+
+GeoIPDatabase	/usr/share/GeoIP/GeoIP.dat
+
+# HTMLPre defines HTML code to insert at the very beginning of the
+# file.  Default is the DOCTYPE line shown below.  Max line length
+# is 80 characters, so use multiple HTMLPre lines if you need more.
+
+#HTMLPre <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+
+# HTMLHead defines HTML code to insert within the <HEAD></HEAD>
+# block, immediately after the <TITLE> line.  Maximum line length
+# is 80 characters, so use multiple lines if needed.
+
+#HTMLHead <META NAME="author" CONTENT="The Webalizer">
+#HTMLHead <META NAME="ROBOTS" CONTENT="NOINDEX, NOFOLLOW">
+
+# HTMLBody defined the HTML code to be inserted, starting with the
+# <BODY> tag.  If not specified, the default is shown below.  If
+# used, you MUST include your own <BODY> tag as the first line.
+# Maximum line length is 80 char, use multiple lines if needed.
+
+#HTMLBody <BODY BGCOLOR="#E8E8E8" TEXT="#000000" LINK="#0000FF" VLINK="#FF0000">
+
+# HTMLPost defines the HTML code to insert immediately before the
+# first <HR> on the document, which is just after the title and
+# "summary period"-"Generated on:" lines.  If anything, this should
+# be used to clean up in case an image was inserted with HTMLBody.
+# As with HTMLHead, you can define as many of these as you want and
+# they will be inserted in the output stream in order of apperance.
+# Max string size is 80 characters.  Use multiple lines if you need to.
+
+#HTMLPost 	<BR CLEAR="all">
+
+# HTMLTail defines the HTML code to insert at the bottom of each
+# HTML document, usually to include a link back to your home
+# page or insert a small graphic.  It is inserted as a table
+# data element (ie: <TD> your code here </TD>) and is right
+# alligned with the page.  Max string size is 80 characters.
+
+#HTMLTail <IMG SRC="msfree.png" ALT="100% Micro$oft free!">
+
+# HTMLEnd defines the HTML code to add at the very end of the
+# generated files.  It defaults to what is shown below.  If
+# used, you MUST specify the </BODY> and </HTML> closing tags
+# as the last lines.  Max string length is 80 characters.
+
+#HTMLEnd </BODY></HTML>
+
+# The LinkReferrer option determines if entries in the referrer table
+# should be plain text or a HTML link to the referrer.  Values can be
+# either 'yes' or 'no', with 'no' being the default.
+
+#LinkReferrer	no
+
+# The Quiet option suppresses output messages... Useful when run
+# as a cron job to prevent bogus e-mails.  Values can be either
+# "yes" or "no".  Default is "no".  Note: this does not suppress
+# warnings and errors (which are printed to stderr).
+
+#Quiet		no
+
+# ReallyQuiet will supress all messages including errors and
+# warnings.  Values can be 'yes' or 'no' with 'no' being the
+# default.  If 'yes' is used here, it cannot be overriden from
+# the command line, so use with caution.  A value of 'no' has
+# no effect.
+
+#ReallyQuiet	no
+
+# TimeMe allows you to force the display of timing information
+# at the end of processing.  A value of 'yes' will force the
+# timing information to be displayed.  A value of 'no' has no
+# effect.
+
+#TimeMe		no
+
+# GMTTime allows reports to show GMT (UTC) time instead of local
+# time.  Default is to display the time the report was generated
+# in the timezone of the local machine, such as EDT or PST.  This
+# keyword allows you to have times displayed in UTC instead.  Use
+# only if you really have a good reason, since it will probably
+# screw up the reporting periods by however many hours your local
+# time zone is off of GMT.
+
+#GMTTime	no
+
+# Debug prints additional information for error messages.  This
+# will cause webalizer to dump bad records/fields instead of just
+# telling you it found a bad one.   As usual, the value can be
+# either "yes" or "no".  The default is "no".  It shouldn't be
+# needed unless you start getting a lot of Warning or Error
+# messages and want to see why.  (Note: warning and error messages
+# are printed to stderr, not stdout like normal messages).
+
+#Debug		no
+
+# FoldSeqErr forces the Webalizer to ignore sequence errors.
+# This is useful for Netscape and other web servers that cache
+# the writing of log records and do not guarentee that they
+# will be in chronological order.  The use of the FoldSeqErr
+# option will cause out of sequence log records to be treated
+# as if they had the same time stamp as the last valid record.
+# Default is to ignore out of sequence log records.  The use
+# of this feature is strongly discouraged and rarely needed.
+# (the webalizer already compensates for up to 60 minutes of
+# difference between records).
+
+#FoldSeqErr	no
+
+# VisitTimeout allows you to set the default timeout for a visit
+# (sometimes called a 'session').  The default is 30 minutes,
+# which should be fine for most sites.
+# Visits are determined by looking at the time of the current
+# request, and the time of the last request from the site.  If
+# the time difference is greater than the VisitTimeout value, it
+# is considered a new visit, and visit totals are incremented.
+# Value is the number of seconds to timeout (default=1800=30min)
+
+#VisitTimeout	1800
+
+# IgnoreHist shouldn't be used in a config file, but it is here
+# just because it might be usefull in certain situations.  If the
+# history file is ignored, the main "index.html" file will only
+# report on the current log files contents.  Usefull only when you
+# want to reproduce the reports from scratch.  USE WITH CAUTION!
+# Valid values are "yes" or "no".  Default is "no".
+
+#IgnoreHist	no
+
+# IgnoreState also shouldn't be used, but is here anyway.  It is
+# similar to the IgnoreHist option, but for the incremental data
+# file.  If this is set to 'yes', any existing incrememtal data
+# will be ignored and a new data file will be written at the end
+# of processing.  USE WITH CAUTION.  By ignoring an existing
+# incremental data file, all previous processing for the current
+# month will be lost, and those logs must be re-processed.
+# Valid values are "yes" or "no".  Default is "no".
+
+#IgnoreState	no
+
+# ResponseGraph allows the response code graph to be disabled.
+# Values can be 'yes' or 'no', default is 'yes'.
+
+#ResponseGraph	yes
+
+# CountryGraph allows the usage by country graph to be disabled.
+# Values can be 'yes' or 'no', default is 'yes'.
+
+#CountryGraph	yes
+
+# CountryFlags allows flags to be displayed in the top country
+# table in monthly reports.  Values can be 'yes' or 'no', with
+# the default being 'no'.
+
+#CountryFlags	no
+
+# FlagDir specifies the location of flag graphics which will be
+# used in the top country table.  If not specified, the default
+# is to look in the 'flags' directory directly under the output
+# directory being used for the reports.  If this option is used,
+# the display of flag graphics will be enabled by default.
+
+#FlagDir	flags
+
+# DailyGraph and DailyStats allows the daily statistics graph
+# and statistics table to be disabled (not displayed).  Values
+# may be "yes" or "no". Default is "yes".
+
+#DailyGraph	yes
+#DailyStats	yes
+
+# HourlyGraph and HourlyStats allows the hourly statistics graph
+# and statistics table to be disabled (not displayed).  Values
+# may be "yes" or "no". Default is "yes".
+
+#HourlyGraph	yes
+#HourlyStats	yes
+
+# GraphLegend allows the color coded legends to be turned on or off
+# in the graphs.  The default is for them to be displayed.  This only
+# toggles the color coded legends, the other legends are not changed.
+# If you think they are hideous and ugly, say 'no' here :)
+
+#GraphLegend	yes
+
+# GraphLines allows you to have index lines drawn behind the graphs.
+# I personally am not crazy about them, but a lot of people requested
+# them and they weren't a big deal to add.  The number represents the
+# number of lines you want displayed.  Default is 2, you can disable
+# the lines by using a value of zero ('0').  [max is 20]
+# Note, due to rounding errors, some values don't work quite right.
+# The lower the better, with 1,2,3,4,6 and 10 producing nice results.
+
+#GraphLines	4
+
+# IndexMonths defines the number of months to display in the main index
+# (yearly summary) table.  Value can be between 12 and 120, with the
+# default being 12 months (1 year).
+
+#IndexMonths	12
+
+# YearHeaders enables/disables the display of year headers in the main
+# index (yearly summary) table.  If enabled, year headers will be shown
+# when the table is displaying more than 16 months worth of data. Values
+# can be 'yes' or 'no', with 'yes' being the default.
+
+#YearHeaders	yes
+
+# YearTotals enables/disables the display of yearly totals in the main
+# index (yearly summary) table.  If enabled, year totals will be shown
+# when the table is displaying more than 16 months worth of data.  Values
+# can be 'yes' or 'no', with 'yes' being the default.
+
+#YearTotals	yes
+
+# GraphMonths defines the number of months to display in the main index
+# (yearly summary) graph.  Value can be between 12 and 72 months, with
+# the default being 12 months.
+
+#GraphMonths	12
+
+# The "Top" options below define the number of entries for each table.
+# Defaults are Sites=30, URLs=30, Referrers=30 and Agents=15, and
+# Countries=30. TopKSites and TopKURLs (by KByte tables) both default
+# to 10, as do the top entry/exit tables (TopEntry/TopExit).  The top
+# search strings and usernames default to 20.  Tables may be disabled
+# by using zero (0) for the value.
+
+#TopSites        30
+#TopKSites       10
+#TopURLs         30
+#TopKURLs        10
+#TopReferrers    30
+#TopAgents       15
+#TopCountries    30
+#TopEntry        10
+#TopExit         10
+#TopSearch       20
+#TopUsers        20
+
+# The All* keywords allow the display of all URL's, Sites, Referrers, User
+# Agents, Search Strings, Usernames and 404-errors.  If enabled, a seperate
+# HTML page will be created, and a link will be added to the bottom
+# of the appropriate "Top" table.  There are a couple of conditions
+# for this to occur.  First, there must be more items than will fit
+# in the "Top" table (otherwise it would just be duplicating what is
+# already displayed).  Second, the listing will only show those items
+# that are normally visable, which means it will not show any hidden
+# items.  Grouped entries will be listed first, followed by individual
+# items.  The value for these keywords can be either 'yes' or 'no',
+# with the default being 'no' (exc. 404-errors). Please be aware that these
+# pages can be quite large in size, particularly the sites page, and seperate
+# pages are generated for each month, which can consume quite a lot
+# of disk space depending on the traffic to your site.
+
+#AllSites	no
+#AllURLs	no
+#AllReferrers	no
+#AllAgents	no
+#AllSearchStr	no
+#AllUsers       no
+#AllErrors      yes
+
+# The Webalizer normally strips the string 'index.' off the end of
+# URLs in order to consolidate URL totals.  For example, the URL
+# /somedir/index.html is turned into /somedir/ which is really the
+# same URL.  This option allows you to specify additional strings
+# to treat in the same way.  You don't need to specify 'index.' as
+# it is always scanned for by The Webalizer, this option is just to
+# specify _additional_ strings if needed.  If you don't need any,
+# don't specify any as each string will be scanned for in EVERY
+# log record... A bunch of them will degrade performance.  Also,
+# the string is scanned for anywhere in the URL, so a string of
+# 'home' would turn the URL /somedir/homepages/brad/home.html into
+# just /somedir/ which is probably not what was intended.
+
+#IndexAlias     home.htm
+#IndexAlias	homepage.htm
+
+# The DefaultIndex option is used to enable/disable the use of
+# "index." as the default index name to be stripped off the end of
+# a URL (as described above).  Most sites will not need to use this
+# option, but some may, such as those whose default index file name
+# is different, or those that use "index.php" or similar URLs in a
+# dynamic environment.  Values can be 'yes' or 'no', with the default
+# being 'yes'.  This option does not effect any names added using the
+# IndexAlias option, and those names will still function as described
+# regardless of this setting.
+
+#DefaultIndex	yes
+
+# The Hide*, Group* and Ignore* and Include* keywords allow you to
+# change the way Sites, URLs, Referrers, User Agents and Usernames
+# are manipulated.  The Ignore* keywords will cause The Webalizer to
+# completely ignore records as if they didn't exist (and thus not
+# counted in the main site totals).  The Hide* keywords will prevent
+# things from being displayed in the 'Top' tables, but will still be
+# counted in the main totals.  The Group* keywords allow grouping
+# similar objects as if they were one.  Grouped records are displayed
+# in the 'Top' tables and can optionally be displayed in BOLD and/or
+# shaded. Groups cannot be hidden, and are not counted in the main
+# totals. The Group* options do not, by default, hide all the items
+# that it matches.  If you want to hide the records that match (so just
+# the grouping record is displayed), follow with an identical Hide*
+# keyword with the same value.  (see example below)  In addition,
+# Group* keywords may have an optional label which will be displayed
+# instead of the keywords value.  The label should be seperated from
+# the value by at least one 'white-space' character, such as a space
+# or tab.  If the match string contains whitespace (spaces or tabs),
+# the string should be quoted with either single or double quotes.
+#
+# The value can have either a leading or trailing '*' wildcard
+# character.  If no wildcard is found, a match can occur anywhere
+# in the string. Given a string "www.yourmama.com", the values "your",
+# "*mama.com" and "www.your*" will all match.
+
+# Your own site should be hidden
+HideSite	*gnutls.org
+#HideSite	localhost
+
+# Your own site gives most referrals
+HideReferrer	gnutls.org/
+HideReferrer	gnu.org/software/gnutls/
+
+# This one hides non-referrers ("-" Direct requests)
+#HideReferrer	Direct Request
+
+# Usually you want to hide these
+HideURL		*.gif
+HideURL		*.GIF
+HideURL		*.jpg
+HideURL		*.JPG
+HideURL		*.png
+HideURL		*.PNG
+HideURL		*.ra
+
+# Hiding agents is kind of futile
+HideAgent	msnbot
+HideAgent       ia_archiver 
+
+# You can also hide based on authenticated username
+#HideUser	root
+#HideUser	admin
+
+# Grouping options
+
+#GroupURL	/css/*	CSS
+HideURL	/css/*
+#GroupURL	/graphics/*	Images
+HideURL	/graphics/*
+GroupURL	/manual/html_node/*	HTML Manual
+HideURL	/manual/html_node/*
+GroupURL	/reference/*	Reference Manual
+HideURL	/reference/*
+
+#GroupSite	*.aol.com
+#GroupSite	*.compuserve.com
+
+GroupReferrer	yahoo.com/	Yahoo!
+GroupReferrer	excite.com/     Excite
+GroupReferrer	infoseek.com/   InfoSeek
+GroupReferrer	webcrawler.com/ WebCrawler
+GroupReferrer	serverfault.com	serverfault.com
+
+#GroupUser      root            Admin users
+#GroupUser      admin           Admin users
+#GroupUser      wheel           Admin users
+
+# The following is a great way to get an overall total
+# for browsers, and not display all the detail records.
+# (You should use MangleAgent to refine further...)
+
+GroupAgent	Opera/		Opera
+HideAgent	Opera/
+GroupAgent	"MSIE 9"	Microsoft Internet Explorer 9
+HideAgent	MSIE 9
+GroupAgent	"MSIE 8"	Microsoft Internet Explorer 8
+HideAgent	MSIE 8
+GroupAgent	"MSIE 7"	Microsoft Internet Explorer 7
+HideAgent	MSIE 7
+GroupAgent	"MSIE 6"	Microsoft Internet Explorer 6
+HideAgent	MSIE 6
+GroupAgent	"MSIE "		Older Microsoft Explorers
+HideAgent	MSIE 
+GroupAgent	Firefox/	Firefox
+HideAgent	Firefox/
+GroupAgent	Gecko/	Mozilla
+HideAgent	Gecko/
+GroupAgent	Chrome/	Chrome
+HideAgent	Chrome/
+GroupAgent	Konqueror	Konqueror
+HideAgent	Konqueror
+GroupAgent	Safari		Safari
+HideAgent	Safari
+GroupAgent	AppleWebKit	Safari
+HideAgent	AppleWebKit
+GroupAgent	Lynx*		Lynx
+HideAgent	Lynx*
+GroupAgent	Wget/		WGet
+HideAgent	Wget/
+GroupAgent	(compatible;	Other Mozilla Compatibles
+HideAgent	(compatible;
+#GroupAgent	Mozilla*	Mozilla/Netscape
+#HideAgent	Mozilla*
+
+# HideAllSites allows forcing individual sites to be hidden in the
+# report.  This is particularly useful when used in conjunction
+# with the "GroupDomain" feature, but could be useful in other
+# situations as well, such as when you only want to display grouped
+# sites (with the GroupSite keywords...).  The value for this
+# keyword can be either 'yes' or 'no', with 'no' the default,
+# allowing individual sites to be displayed.
+
+HideAllSites	yes
+
+# HideInOutVolume allows forcing the input/output volume to be hidden in
+# the report.  This is particularly useful if you don't have the Apache
+# module "mod_logio" enabled on your webserver.  The value for this
+# keyword can be either 'yes' or 'no', with 'no' the default, allowing
+# the input/output volume to be displayed.
+
+#HideInOutVolume no
+
+# The GroupDomains keyword allows you to group individual hostnames
+# into their respective domains.  The value specifies the level of
+# grouping to perform, and can be thought of as 'the number of dots'
+# that will be displayed.  For example, if a visiting host is named
+# cust1.tnt.mia.uu.net, a domain grouping of 1 will result in just
+# "uu.net" being displayed, while a 2 will result in "mia.uu.net".
+# The default value of zero disable this feature.  Domains will only
+# be grouped if they do not match any existing "GroupSite" records,
+# which allows overriding this feature with your own if desired.
+
+#GroupDomains	0
+
+# The GroupShading allows grouped rows to be shaded in the report.
+# Useful if you have lots of groups and individual records that
+# intermingle in the report, and you want to diferentiate the group
+# records a little more.  Value can be 'yes' or 'no', with 'yes'
+# being the default.
+
+#GroupShading	yes
+
+# GroupHighlight allows the group record to be displayed in BOLD.
+# Can be either 'yes' or 'no' with the default 'yes'.
+
+#GroupHighlight	yes
+
+# The Ignore* keywords allow you to completely ignore log records based
+# on hostname, URL, user agent, referrer or username.  I hesitated in
+# adding these, since the Webalizer was designed to generate _accurate_
+# statistics about a web servers performance.  By choosing to ignore
+# records, the accuracy of reports become skewed, negating why I wrote
+# this program in the first place.  However, due to popular demand, here
+# they are.  Use the same as the Hide* keywords, where the value can have
+# a leading or trailing wildcard '*'.  Use at your own risk ;)  Please
+# remember, the use of these will MAKE YOUR STATS INACCURATE and you
+# should consider using an equivalent 'Hide*' keyword instead.
+
+#IgnoreSite	localhost
+#IgnoreSite	bad.site.net
+#IgnoreURL	/test*
+#IgnoreReferrer	localhost
+#IgnoreReferrer	file:/*
+#IgnoreAgent	RealPlayer
+#IgnoreUser     root
+
+# The Include* keywords allow you to force the inclusion of log records
+# based on hostname, URL, user agent, referrer or username.  They take
+# precidence over the Ignore* keywords.  Note: Using Ignore/Include
+# combinations to selectivly process parts of a web site is _extremely
+# inefficent_!!! Avoid doing so if possible (ie: grep the records to a
+# seperate file if you really want that kind of report).
+
+# Example: Only show stats on Joe User's pages...
+#IgnoreURL	*
+#IncludeURL	~joeuser*
+
+# Or based on an authenticated username
+#IgnoreUser     *
+#IncludeUser    someuser
+
+# The MangleAgents allows you to specify how much, if any, The Webalizer
+# should mangle user agent names.  This allows several levels of detail
+# to be produced when reporting user agent statistics.  There are six
+# levels that can be specified, which define different levels of detail
+# supression.  Level 5 shows only the browser name (MSIE or Mozilla)
+# and the major version number.  Level 4 adds the minor version number
+# (single decimal place).  Level 3 displays the minor version to two
+# decimal places.  Level 2 will add any sub-level designation (such
+# as Mozilla/3.01Gold or MSIE 3.0b).  Level 1 will attempt to also add
+# the system type if it is specified.  The default Level 0 displays the
+# full user agent field without modification and produces the greatest
+# amount of detail.  User agent names that can't be mangled will be
+# left unmodified.
+
+#MangleAgents    0
+
+# The SearchEngine keywords allow specification of search engines and
+# their query strings on the URL.  These are used to locate and report
+# what search strings are used to find your site.  The first word is
+# a substring to match in the referrer field that identifies the search
+# engine, and the second is the URL variable used by that search engine
+# to define it's search terms.
+
+SearchEngine	.google.   	q=
+SearchEngine	yahoo.com	p=
+SearchEngine	altavista.com	q=
+SearchEngine   aolsearch.      query=
+SearchEngine   ask.co          q=
+SearchEngine	eureka.com	q=
+SearchEngine	lycos.com	query=
+SearchEngine	hotbot.com	MT=
+SearchEngine	msn.com		q=
+SearchEngine	infoseek.com	qt=
+SearchEngine	excite		search=
+SearchEngine	netscape.com	query=
+SearchEngine	mamma.com	query=
+SearchEngine	alltheweb.com	q=
+SearchEngine	northernlight.com  qr=
+
+# Normally, search strings are converted to lower case in order to
+# increase accuracy.  The SearchCaseI option allows them to maintain
+# case sensitivity, useful for some sites.  The value can be 'yes'
+# or 'no', with 'yes' (case insensitive) being the default.
+
+#SearchCaseI	yes
+
+# The Dump* keywords allow the dumping of Sites, URLs, Referrers
+# User Agents, Usernames and Search strings to seperate tab delimited
+# text files, suitable for import into most database or spreadsheet
+# programs.
+
+# DumpPath specifies the path to dump the files.  If not specified,
+# it will default to the current output directory.  Do not use a
+# trailing slash ('/').
+
+#DumpPath	/var/lib/httpd/logs
+
+# The DumpHeader keyword specifies if a header record should be
+# written to the file.  A header record is the first record of the
+# file, and contains the labels for each field written.  Normally,
+# files that are intended to be imported into a database system
+# will not need a header record, while spreadsheets usually do.
+# Value can be either 'yes' or 'no', with 'no' being the default.
+
+#DumpHeader	no
+
+# DumpExtension allow you to specify the dump filename extension
+# to use.  The default is "tab", but some programs are pickey about
+# the filenames they use, so you may change it here (for example,
+# some people may prefer to use "csv").
+
+#DumpExtension	tab
+
+# These control the dumping of each individual table.  The value
+# can be either 'yes' or 'no'.. the default is 'no'.
+
+#DumpSites	no
+#DumpURLs	no
+#DumpReferrers	no
+#DumpAgents	no
+#DumpUsers	no
+#DumpSearchStr  no
+
+# CodeNotFound defines the number of entries in the HTTP-Code 404 error table.
+# The default value for CodeNotFound is 20. The code 404 error table may be
+# disabled by using zero (0) for the value. Using one (1) for the value
+# shows all code 404 errors in the table.
+
+#CodeNotFound    20
+
+# The custom page colors are defined here. Declare them in the standard
+# hexadecimal way (as HTML, without the '#'). If none are given, you will get
+# the standard webalizer colors.
+
+#ColorBackground        e8e8e8
+#ColorText              000000
+#ColorLink              0000ff
+#ColorVLink             ff0000
+#ColorALink             ff0000
+
+# The custom graph colors are defined here. Declare them 
+# in the standard hexadecimal way (as HTML, without the '#') 
+# If none are given, you will get the standard default colors.
+
+#ColorGrp               d0d0e0
+#ColorHeadline          c0c0c0
+#ColorCounter           c0c0c0
+#ColorHit	        00805c
+#ColorFile	        0040ff
+#ColorSite	        ff8000
+#ColorKbyte	        ff0000
+#ColorPage	        00e0ff
+#ColorVisit	        ffff00
+#ColorMisc              00e0ff
+
+#ChartBackgroundColor	c0c0c0
+#ChartLegendColor	000000
+#ChartShadowColor1      ffffff
+#ChartShadowColor2      808080
+
+#PieColor1	        800080
+#PieColor2	        80ffc0
+#PieColor3	        ff00ff
+#PieColor4	        ffc080
+
+# TableBorder defines the border size of all tables.
+# The default value for TableBorder is 1. The table border may be disabled by
+# using zero (0) for the value. Only values between 0 and 5 are allowed.
+
+#TableBorder     1
+
+# ChartBorder defines the border size of all charts.
+# The default value for ChartBorder is 1. The chart border may be disabled by
+# using zero (0) for the value. Only values between 0 and 5 are allowed.
+
+#ChartBorder     1
+
+# ArchiveReports defines (additionally to at most 120 index months) the number
+# of archived reports (months) that will be shown as links in a separate table on
+# the summary page. If a month's report is not available no link will be shown.
+# The default number is 0 (disabled). The maximum number is 480 (40 years).
+
+#ArchiveReports  0
+
+# End of configuration file...  Have a nice day!
author	Nikos Mavrogiannopoulos <nmav@gnutls.org>	2012-12-05 23:45:03 +0100
committer	Nikos Mavrogiannopoulos <nmav@gnutls.org>	2012-12-05 23:45:03 +0100
commit	fb5811f60e0716132db26c35cdb9f21af79ed9ff (patch)
tree	4bc6a962d649ea97171ccd14d2eb114036e95b6d
parent	b33145bc8d4ae5fc41a321d8c8178ea368db3fbc (diff)
download	gnutls-fb5811f60e0716132db26c35cdb9f21af79ed9ff.tar.gz