summaryrefslogtreecommitdiff
path: root/src/examples/htmlStripper.py
diff options
context:
space:
mode:
authorCengiz Kaygusuz <cngkaygusuz@gmail.com>2017-11-20 20:46:39 -0500
committerCengiz Kaygusuz <cngkaygusuz@gmail.com>2017-11-20 20:46:39 -0500
commit27e183a78c8062ed7c2bbb91655a5e56cd697bba (patch)
tree88fd355a0cc6da4c130582e092d702836596cbb2 /src/examples/htmlStripper.py
parent4ba589cf13588e90992e23deb5a9784340efd2cc (diff)
downloadpyparsing-git-27e183a78c8062ed7c2bbb91655a5e56cd697bba.tar.gz
Move src to root
Diffstat (limited to 'src/examples/htmlStripper.py')
-rw-r--r--src/examples/htmlStripper.py32
1 files changed, 0 insertions, 32 deletions
diff --git a/src/examples/htmlStripper.py b/src/examples/htmlStripper.py
deleted file mode 100644
index 1d7a0f0..0000000
--- a/src/examples/htmlStripper.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# htmlStripper.py
-#
-# Sample code for stripping HTML markup tags and scripts from
-# HTML source files.
-#
-# Copyright (c) 2006, 2016, Paul McGuire
-#
-from contextlib import closing
-import urllib.request, urllib.parse, urllib.error
-from pyparsing import (makeHTMLTags, SkipTo, commonHTMLEntity, replaceHTMLEntity,
- htmlComment, anyOpenTag, anyCloseTag, LineEnd, OneOrMore, replaceWith)
-
-scriptOpen,scriptClose = makeHTMLTags("script")
-scriptBody = scriptOpen + SkipTo(scriptClose) + scriptClose
-commonHTMLEntity.setParseAction(replaceHTMLEntity)
-
-# get some HTML
-targetURL = "http://wiki.python.org/moin/PythonDecoratorLibrary"
-with closing(urllib.request.urlopen( targetURL )) as targetPage:
- targetHTML = targetPage.read().decode("UTF-8")
-
-# first pass, strip out tags and translate entities
-firstPass = (htmlComment | scriptBody | commonHTMLEntity |
- anyOpenTag | anyCloseTag ).suppress().transformString(targetHTML)
-
-# first pass leaves many blank lines, collapse these down
-repeatedNewlines = LineEnd() + OneOrMore(LineEnd())
-repeatedNewlines.setParseAction(replaceWith("\n\n"))
-secondPass = repeatedNewlines.transformString(firstPass)
-
-print(secondPass) \ No newline at end of file