Added pyparsing_common.stripHTMLTags; added links to pyparsing_common docstring

git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@360 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
author: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2016-06-02 12:37:27 +0000
committer: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2016-06-02 12:37:27 +0000
commit: d66cd522c20810bf24f73af0adece6c86d307699 (patch)
tree: 52f15b1dbd8cceb1bf508fc333956c60b37c863a
parent: ce01d6e59b795879dfabc3433b4507cde1e58b71 (diff)
download: pyparsing-d66cd522c20810bf24f73af0adece6c86d307699.tar.gz
2 files changed, 17 insertions, 8 deletions
diff --git a/src/CHANGES b/src/CHANGES
index a24709f..3b33ed9 100644
--- a/src/CHANGES
+++ b/src/CHANGES
@@ -2,7 +2,7 @@
 Change Log
 ==========
 
-Verison 2.1.5 - 
+Verison 2.1.5 - June, 2016
 ------------------------------
 - Added a new parse action construction helper tokenMap, which will
   apply a function and optional arguments to each element in a 
@@ -36,6 +36,7 @@ Verison 2.1.5 -
   . ISO8601 date and date time strings
   . UUID (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)
   . hex integer (returned as int)
+  . stripHTMLTags (parse action to remove tags from HTML source)
 
 - runTests now returns a two-tuple: success if all tests succeed,
   and an output list of each test and its output lines.
diff --git a/src/pyparsing.py b/src/pyparsing.py
index a9edfcb..e68b78c 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -58,7 +58,7 @@ The pyparsing module handles some of the problems that are typically vexing when
 """
 
 __version__ = "2.1.5"
-__versionTime__ = "24 May 2016 04:18 UTC"
+__versionTime__ = "02 Jun 2016 12:25 UTC"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 
 import string
@@ -3940,12 +3940,15 @@ commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepite
 class pyparsing_common:
     """
     Here are some common low-level expressions that may be useful in jump-starting parser development:
-     - numeric forms (integers, reals, scientific notation)
-     - parse actions for converting numeric strings to Python int and/or float types
-     - common programming identifiers
-     - network addresses (MAC, IPv4, IPv6)
-     - ISO8601 dates and datetimes
-     - UUID
+     - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sciReal>})
+     - common L{programming identifiers<identifier>}
+     - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
+     - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
+     - L{UUID<uuid>}
+    Parse actions:
+     - C{L{convertToInteger}}
+     - C{L{convertToFloat}}
+     - C{L{stripHTMLTags}}
     """
 
     convertToInteger = tokenMap(int)
@@ -4005,6 +4008,11 @@ class pyparsing_common:
 
     uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
     "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
+    
+    _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
+    def stripHTMLTags(s,l,tokens):
+        """Parse action to remove HTML tags from web page HTML source"""
+        return _html_stripper.transformString(tokens[0])
 
 if __name__ == "__main__":
author	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2016-06-02 12:37:27 +0000
committer	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2016-06-02 12:37:27 +0000
commit	d66cd522c20810bf24f73af0adece6c86d307699 (patch)
tree	52f15b1dbd8cceb1bf508fc333956c60b37c863a
parent	ce01d6e59b795879dfabc3433b4507cde1e58b71 (diff)
download	pyparsing-d66cd522c20810bf24f73af0adece6c86d307699.tar.gz