summaryrefslogtreecommitdiff
path: root/examples/htmlStripper.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/htmlStripper.py')
-rw-r--r--examples/htmlStripper.py8
1 files changed, 4 insertions, 4 deletions
diff --git a/examples/htmlStripper.py b/examples/htmlStripper.py
index 1d7a0f0..6fc4aef 100644
--- a/examples/htmlStripper.py
+++ b/examples/htmlStripper.py
@@ -1,14 +1,14 @@
#
# htmlStripper.py
#
-# Sample code for stripping HTML markup tags and scripts from
+# Sample code for stripping HTML markup tags and scripts from
# HTML source files.
#
# Copyright (c) 2006, 2016, Paul McGuire
#
from contextlib import closing
import urllib.request, urllib.parse, urllib.error
-from pyparsing import (makeHTMLTags, SkipTo, commonHTMLEntity, replaceHTMLEntity,
+from pyparsing import (makeHTMLTags, SkipTo, commonHTMLEntity, replaceHTMLEntity,
htmlComment, anyOpenTag, anyCloseTag, LineEnd, OneOrMore, replaceWith)
scriptOpen,scriptClose = makeHTMLTags("script")
@@ -21,7 +21,7 @@ with closing(urllib.request.urlopen( targetURL )) as targetPage:
targetHTML = targetPage.read().decode("UTF-8")
# first pass, strip out tags and translate entities
-firstPass = (htmlComment | scriptBody | commonHTMLEntity |
+firstPass = (htmlComment | scriptBody | commonHTMLEntity |
anyOpenTag | anyCloseTag ).suppress().transformString(targetHTML)
# first pass leaves many blank lines, collapse these down
@@ -29,4 +29,4 @@ repeatedNewlines = LineEnd() + OneOrMore(LineEnd())
repeatedNewlines.setParseAction(replaceWith("\n\n"))
secondPass = repeatedNewlines.transformString(firstPass)
-print(secondPass) \ No newline at end of file
+print(secondPass)