diff options
author | Cengiz Kaygusuz <cngkaygusuz@gmail.com> | 2017-11-20 20:46:39 -0500 |
---|---|---|
committer | Cengiz Kaygusuz <cngkaygusuz@gmail.com> | 2017-11-20 20:46:39 -0500 |
commit | 27e183a78c8062ed7c2bbb91655a5e56cd697bba (patch) | |
tree | 88fd355a0cc6da4c130582e092d702836596cbb2 /src/examples/htmlStripper.py | |
parent | 4ba589cf13588e90992e23deb5a9784340efd2cc (diff) | |
download | pyparsing-git-27e183a78c8062ed7c2bbb91655a5e56cd697bba.tar.gz |
Move src to root
Diffstat (limited to 'src/examples/htmlStripper.py')
-rw-r--r-- | src/examples/htmlStripper.py | 32 |
1 files changed, 0 insertions, 32 deletions
diff --git a/src/examples/htmlStripper.py b/src/examples/htmlStripper.py deleted file mode 100644 index 1d7a0f0..0000000 --- a/src/examples/htmlStripper.py +++ /dev/null @@ -1,32 +0,0 @@ -#
-# htmlStripper.py
-#
-# Sample code for stripping HTML markup tags and scripts from
-# HTML source files.
-#
-# Copyright (c) 2006, 2016, Paul McGuire
-#
-from contextlib import closing
-import urllib.request, urllib.parse, urllib.error
-from pyparsing import (makeHTMLTags, SkipTo, commonHTMLEntity, replaceHTMLEntity,
- htmlComment, anyOpenTag, anyCloseTag, LineEnd, OneOrMore, replaceWith)
-
-scriptOpen,scriptClose = makeHTMLTags("script")
-scriptBody = scriptOpen + SkipTo(scriptClose) + scriptClose
-commonHTMLEntity.setParseAction(replaceHTMLEntity)
-
-# get some HTML
-targetURL = "http://wiki.python.org/moin/PythonDecoratorLibrary"
-with closing(urllib.request.urlopen( targetURL )) as targetPage:
- targetHTML = targetPage.read().decode("UTF-8")
-
-# first pass, strip out tags and translate entities
-firstPass = (htmlComment | scriptBody | commonHTMLEntity |
- anyOpenTag | anyCloseTag ).suppress().transformString(targetHTML)
-
-# first pass leaves many blank lines, collapse these down
-repeatedNewlines = LineEnd() + OneOrMore(LineEnd())
-repeatedNewlines.setParseAction(replaceWith("\n\n"))
-secondPass = repeatedNewlines.transformString(firstPass)
-
-print(secondPass)
\ No newline at end of file |