summaryrefslogtreecommitdiff
path: root/examples/httpServerLogParser.py
diff options
context:
space:
mode:
authorJon Dufresne <jon.dufresne@gmail.com>2018-12-22 09:28:48 -0800
committerJon Dufresne <jon.dufresne@gmail.com>2018-12-22 13:46:56 -0800
commitde8326d00dffdb500c02839a98330b869c2457f3 (patch)
tree6c5fdae41cf8b335ff1c64f37856786523e4fd0d /examples/httpServerLogParser.py
parent59dfd314c23fd653271bdad37631f0497e8ad748 (diff)
downloadpyparsing-git-de8326d00dffdb500c02839a98330b869c2457f3.tar.gz
Trim trailing white space throughout the project
Many editors clean up trailing white space on save. By removing it all in one go, it helps keep future diffs cleaner by avoiding spurious white space changes on unrelated lines.
Diffstat (limited to 'examples/httpServerLogParser.py')
-rw-r--r--examples/httpServerLogParser.py28
1 files changed, 14 insertions, 14 deletions
diff --git a/examples/httpServerLogParser.py b/examples/httpServerLogParser.py
index a147a05..261cea3 100644
--- a/examples/httpServerLogParser.py
+++ b/examples/httpServerLogParser.py
@@ -5,11 +5,11 @@
"""
Parser for HTTP server log output, of the form:
-195.146.134.15 - - [20/Jan/2003:08:55:36 -0800]
-"GET /path/to/page.html HTTP/1.0" 200 4649 "http://www.somedomain.com/020602/page.html"
+195.146.134.15 - - [20/Jan/2003:08:55:36 -0800]
+"GET /path/to/page.html HTTP/1.0" 200 4649 "http://www.somedomain.com/020602/page.html"
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
-127.0.0.1 - u.surname@domain.com [12/Sep/2006:14:13:53 +0300]
-"GET /skins/monobook/external.png HTTP/1.0" 304 - "http://wiki.mysite.com/skins/monobook/main.css"
+127.0.0.1 - u.surname@domain.com [12/Sep/2006:14:13:53 +0300]
+"GET /skins/monobook/external.png HTTP/1.0" 304 - "http://wiki.mysite.com/skins/monobook/main.css"
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5.0.6"
You can then break it up as follows:
@@ -32,30 +32,30 @@ def getCmdFields( s, l, t ):
logLineBNF = None
def getLogLineBNF():
global logLineBNF
-
+
if logLineBNF is None:
integer = Word( nums )
ipAddress = delimitedList( integer, ".", combine=True )
-
+
timeZoneOffset = Word("+-",nums)
month = Word(string.uppercase, string.lowercase, exact=3)
- serverDateTime = Group( Suppress("[") +
+ serverDateTime = Group( Suppress("[") +
Combine( integer + "/" + month + "/" + integer +
":" + integer + ":" + integer + ":" + integer ) +
- timeZoneOffset +
+ timeZoneOffset +
Suppress("]") )
-
- logLineBNF = ( ipAddress.setResultsName("ipAddr") +
+
+ logLineBNF = ( ipAddress.setResultsName("ipAddr") +
Suppress("-") +
("-" | Word( alphas+nums+"@._" )).setResultsName("auth") +
- serverDateTime.setResultsName("timestamp") +
+ serverDateTime.setResultsName("timestamp") +
dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) +
- (integer | "-").setResultsName("statusCode") +
- (integer | "-").setResultsName("numBytesSent") +
+ (integer | "-").setResultsName("statusCode") +
+ (integer | "-").setResultsName("numBytesSent") +
dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) +
dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) )
return logLineBNF
-
+
testdata = """
195.146.134.15 - - [20/Jan/2003:08:55:36 -0800] "GET /path/to/page.html HTTP/1.0" 200 4649 "http://www.somedomain.com/020602/page.html" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
111.111.111.11 - - [16/Feb/2004:04:09:49 -0800] "GET /ads/redirectads/336x280redirect.htm HTTP/1.1" 304 - "http://www.foobarp.org/theme_detail.php?type=vs&cat=0&mid=27512" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"