diff options
Diffstat (limited to 'examples/htmlStripper.py')
-rw-r--r-- | examples/htmlStripper.py | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/examples/htmlStripper.py b/examples/htmlStripper.py index 1d7a0f0..6fc4aef 100644 --- a/examples/htmlStripper.py +++ b/examples/htmlStripper.py @@ -1,14 +1,14 @@ #
# htmlStripper.py
#
-# Sample code for stripping HTML markup tags and scripts from
+# Sample code for stripping HTML markup tags and scripts from
# HTML source files.
#
# Copyright (c) 2006, 2016, Paul McGuire
#
from contextlib import closing
import urllib.request, urllib.parse, urllib.error
-from pyparsing import (makeHTMLTags, SkipTo, commonHTMLEntity, replaceHTMLEntity,
+from pyparsing import (makeHTMLTags, SkipTo, commonHTMLEntity, replaceHTMLEntity,
htmlComment, anyOpenTag, anyCloseTag, LineEnd, OneOrMore, replaceWith)
scriptOpen,scriptClose = makeHTMLTags("script")
@@ -21,7 +21,7 @@ with closing(urllib.request.urlopen( targetURL )) as targetPage: targetHTML = targetPage.read().decode("UTF-8")
# first pass, strip out tags and translate entities
-firstPass = (htmlComment | scriptBody | commonHTMLEntity |
+firstPass = (htmlComment | scriptBody | commonHTMLEntity |
anyOpenTag | anyCloseTag ).suppress().transformString(targetHTML)
# first pass leaves many blank lines, collapse these down
@@ -29,4 +29,4 @@ repeatedNewlines = LineEnd() + OneOrMore(LineEnd()) repeatedNewlines.setParseAction(replaceWith("\n\n"))
secondPass = repeatedNewlines.transformString(firstPass)
-print(secondPass)
\ No newline at end of file +print(secondPass)
|