summaryrefslogtreecommitdiff
path: root/src/pyparsing.py
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2016-06-02 12:37:27 +0000
committerPaul McGuire <ptmcg@austin.rr.com>2016-06-02 12:37:27 +0000
commit8ed2a4b42750cb599e380a5c64b9650412363ae9 (patch)
tree52f15b1dbd8cceb1bf508fc333956c60b37c863a /src/pyparsing.py
parent443cd3d0527815d80f22466be3503d58238649e2 (diff)
downloadpyparsing-git-8ed2a4b42750cb599e380a5c64b9650412363ae9.tar.gz
Added pyparsing_common.stripHTMLTags; added links to pyparsing_common docstring
Diffstat (limited to 'src/pyparsing.py')
-rw-r--r--src/pyparsing.py22
1 files changed, 15 insertions, 7 deletions
diff --git a/src/pyparsing.py b/src/pyparsing.py
index a9edfcb..e68b78c 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -58,7 +58,7 @@ The pyparsing module handles some of the problems that are typically vexing when
"""
__version__ = "2.1.5"
-__versionTime__ = "24 May 2016 04:18 UTC"
+__versionTime__ = "02 Jun 2016 12:25 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -3940,12 +3940,15 @@ commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepite
class pyparsing_common:
"""
Here are some common low-level expressions that may be useful in jump-starting parser development:
- - numeric forms (integers, reals, scientific notation)
- - parse actions for converting numeric strings to Python int and/or float types
- - common programming identifiers
- - network addresses (MAC, IPv4, IPv6)
- - ISO8601 dates and datetimes
- - UUID
+ - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sciReal>})
+ - common L{programming identifiers<identifier>}
+ - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
+ - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
+ - L{UUID<uuid>}
+ Parse actions:
+ - C{L{convertToInteger}}
+ - C{L{convertToFloat}}
+ - C{L{stripHTMLTags}}
"""
convertToInteger = tokenMap(int)
@@ -4005,6 +4008,11 @@ class pyparsing_common:
uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
"UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
+
+ _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
+ def stripHTMLTags(s,l,tokens):
+ """Parse action to remove HTML tags from web page HTML source"""
+ return _html_stripper.transformString(tokens[0])
if __name__ == "__main__":