summaryrefslogtreecommitdiff
path: root/examples/htmlStripper.py
diff options
context:
space:
mode:
authorJon Dufresne <jon.dufresne@gmail.com>2019-10-31 21:10:28 -0700
committerPaul McGuire <ptmcg@users.noreply.github.com>2019-10-31 23:10:28 -0500
commit53d1b4a6f48a53c4c4ec4ac7031362b691c0366d (patch)
tree088ad3cf3561b78a00af4fb2fd474f4a2b8ca70c /examples/htmlStripper.py
parent41752aa52cc97c710474bb2972cceab057b52ad4 (diff)
downloadpyparsing-git-53d1b4a6f48a53c4c4ec4ac7031362b691c0366d.tar.gz
Blacken the project (#141)
Diffstat (limited to 'examples/htmlStripper.py')
-rw-r--r--examples/htmlStripper.py23
1 files changed, 17 insertions, 6 deletions
diff --git a/examples/htmlStripper.py b/examples/htmlStripper.py
index bd99b77..6a209fa 100644
--- a/examples/htmlStripper.py
+++ b/examples/htmlStripper.py
@@ -7,8 +7,16 @@
# Copyright (c) 2006, 2016, Paul McGuire
#
from urllib.request import urlopen
-from pyparsing import (makeHTMLTags, commonHTMLEntity, replaceHTMLEntity,
- htmlComment, anyOpenTag, anyCloseTag, LineEnd, replaceWith)
+from pyparsing import (
+ makeHTMLTags,
+ commonHTMLEntity,
+ replaceHTMLEntity,
+ htmlComment,
+ anyOpenTag,
+ anyCloseTag,
+ LineEnd,
+ replaceWith,
+)
scriptOpen, scriptClose = makeHTMLTags("script")
scriptBody = scriptOpen + scriptOpen.tag_body + scriptClose
@@ -16,15 +24,18 @@ commonHTMLEntity.setParseAction(replaceHTMLEntity)
# get some HTML
targetURL = "https://wiki.python.org/moin/PythonDecoratorLibrary"
-with urlopen( targetURL ) as targetPage:
+with urlopen(targetURL) as targetPage:
targetHTML = targetPage.read().decode("UTF-8")
# first pass, strip out tags and translate entities
-firstPass = (htmlComment | scriptBody | commonHTMLEntity |
- anyOpenTag | anyCloseTag ).suppress().transformString(targetHTML)
+firstPass = (
+ (htmlComment | scriptBody | commonHTMLEntity | anyOpenTag | anyCloseTag)
+ .suppress()
+ .transformString(targetHTML)
+)
# first pass leaves many blank lines, collapse these down
-repeatedNewlines = LineEnd()*(2,)
+repeatedNewlines = LineEnd() * (2,)
repeatedNewlines.setParseAction(replaceWith("\n\n"))
secondPass = repeatedNewlines.transformString(firstPass)