summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-06-02 12:37:27 +0000
committerptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-06-02 12:37:27 +0000
commitd66cd522c20810bf24f73af0adece6c86d307699 (patch)
tree52f15b1dbd8cceb1bf508fc333956c60b37c863a
parentce01d6e59b795879dfabc3433b4507cde1e58b71 (diff)
downloadpyparsing-d66cd522c20810bf24f73af0adece6c86d307699.tar.gz
Added pyparsing_common.stripHTMLTags; added links to pyparsing_common docstring
git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@360 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
-rw-r--r--src/CHANGES3
-rw-r--r--src/pyparsing.py22
2 files changed, 17 insertions, 8 deletions
diff --git a/src/CHANGES b/src/CHANGES
index a24709f..3b33ed9 100644
--- a/src/CHANGES
+++ b/src/CHANGES
@@ -2,7 +2,7 @@
Change Log
==========
-Verison 2.1.5 -
+Verison 2.1.5 - June, 2016
------------------------------
- Added a new parse action construction helper tokenMap, which will
apply a function and optional arguments to each element in a
@@ -36,6 +36,7 @@ Verison 2.1.5 -
. ISO8601 date and date time strings
. UUID (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)
. hex integer (returned as int)
+ . stripHTMLTags (parse action to remove tags from HTML source)
- runTests now returns a two-tuple: success if all tests succeed,
and an output list of each test and its output lines.
diff --git a/src/pyparsing.py b/src/pyparsing.py
index a9edfcb..e68b78c 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -58,7 +58,7 @@ The pyparsing module handles some of the problems that are typically vexing when
"""
__version__ = "2.1.5"
-__versionTime__ = "24 May 2016 04:18 UTC"
+__versionTime__ = "02 Jun 2016 12:25 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -3940,12 +3940,15 @@ commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepite
class pyparsing_common:
"""
Here are some common low-level expressions that may be useful in jump-starting parser development:
- - numeric forms (integers, reals, scientific notation)
- - parse actions for converting numeric strings to Python int and/or float types
- - common programming identifiers
- - network addresses (MAC, IPv4, IPv6)
- - ISO8601 dates and datetimes
- - UUID
+ - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sciReal>})
+ - common L{programming identifiers<identifier>}
+ - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
+ - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
+ - L{UUID<uuid>}
+ Parse actions:
+ - C{L{convertToInteger}}
+ - C{L{convertToFloat}}
+ - C{L{stripHTMLTags}}
"""
convertToInteger = tokenMap(int)
@@ -4005,6 +4008,11 @@ class pyparsing_common:
uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
"UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
+
+ _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
+ def stripHTMLTags(s,l,tokens):
+ """Parse action to remove HTML tags from web page HTML source"""
+ return _html_stripper.transformString(tokens[0])
if __name__ == "__main__":