diff options
author | Paul McGuire <ptmcg@users.noreply.github.com> | 2018-12-23 21:30:40 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-12-23 21:30:40 -0600 |
commit | 4fba64a079016e6ea62d041f19b7eadd081341e8 (patch) | |
tree | 6c5fdae41cf8b335ff1c64f37856786523e4fd0d /examples/htmlStripper.py | |
parent | 59dfd314c23fd653271bdad37631f0497e8ad748 (diff) | |
parent | de8326d00dffdb500c02839a98330b869c2457f3 (diff) | |
download | pyparsing-git-4fba64a079016e6ea62d041f19b7eadd081341e8.tar.gz |
Merge pull request #55 from jdufresne/ws
Trim trailing white space throughout the project
Diffstat (limited to 'examples/htmlStripper.py')
-rw-r--r-- | examples/htmlStripper.py | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/examples/htmlStripper.py b/examples/htmlStripper.py index 1d7a0f0..6fc4aef 100644 --- a/examples/htmlStripper.py +++ b/examples/htmlStripper.py @@ -1,14 +1,14 @@ #
# htmlStripper.py
#
-# Sample code for stripping HTML markup tags and scripts from
+# Sample code for stripping HTML markup tags and scripts from
# HTML source files.
#
# Copyright (c) 2006, 2016, Paul McGuire
#
from contextlib import closing
import urllib.request, urllib.parse, urllib.error
-from pyparsing import (makeHTMLTags, SkipTo, commonHTMLEntity, replaceHTMLEntity,
+from pyparsing import (makeHTMLTags, SkipTo, commonHTMLEntity, replaceHTMLEntity,
htmlComment, anyOpenTag, anyCloseTag, LineEnd, OneOrMore, replaceWith)
scriptOpen,scriptClose = makeHTMLTags("script")
@@ -21,7 +21,7 @@ with closing(urllib.request.urlopen( targetURL )) as targetPage: targetHTML = targetPage.read().decode("UTF-8")
# first pass, strip out tags and translate entities
-firstPass = (htmlComment | scriptBody | commonHTMLEntity |
+firstPass = (htmlComment | scriptBody | commonHTMLEntity |
anyOpenTag | anyCloseTag ).suppress().transformString(targetHTML)
# first pass leaves many blank lines, collapse these down
@@ -29,4 +29,4 @@ repeatedNewlines = LineEnd() + OneOrMore(LineEnd()) repeatedNewlines.setParseAction(replaceWith("\n\n"))
secondPass = repeatedNewlines.transformString(firstPass)
-print(secondPass)
\ No newline at end of file +print(secondPass)
|