diff options
author | Cengiz Kaygusuz <cngkaygusuz@gmail.com> | 2017-11-20 20:46:39 -0500 |
---|---|---|
committer | Cengiz Kaygusuz <cngkaygusuz@gmail.com> | 2017-11-20 20:46:39 -0500 |
commit | 27e183a78c8062ed7c2bbb91655a5e56cd697bba (patch) | |
tree | 88fd355a0cc6da4c130582e092d702836596cbb2 /src/examples/urlExtractor.py | |
parent | 4ba589cf13588e90992e23deb5a9784340efd2cc (diff) | |
download | pyparsing-git-27e183a78c8062ed7c2bbb91655a5e56cd697bba.tar.gz |
Move src to root
Diffstat (limited to 'src/examples/urlExtractor.py')
-rw-r--r-- | src/examples/urlExtractor.py | 33 |
1 files changed, 0 insertions, 33 deletions
diff --git a/src/examples/urlExtractor.py b/src/examples/urlExtractor.py deleted file mode 100644 index 2c66d78..0000000 --- a/src/examples/urlExtractor.py +++ /dev/null @@ -1,33 +0,0 @@ -# URL extractor
-# Copyright 2004, Paul McGuire
-from pyparsing import makeHTMLTags, SkipTo, pyparsing_common
-import urllib.request
-from contextlib import closing
-import pprint
-
-linkOpenTag, linkCloseTag = makeHTMLTags('a')
-
-linkBody = SkipTo(linkCloseTag)
-linkBody.setParseAction(pyparsing_common.stripHTMLTags)
-linkBody.addParseAction(lambda toks: ' '.join(toks[0].strip().split()))
-
-link = linkOpenTag + linkBody("body") + linkCloseTag.suppress()
-
-# Go get some HTML with some links in it.
-with closing(urllib.request.urlopen("http://www.yahoo.com")) as serverListPage:
- htmlText = serverListPage.read().decode("UTF-8")
-
-# scanString is a generator that loops through the input htmlText, and for each
-# match yields the tokens and start and end locations (for this application, we are
-# not interested in the start and end values).
-for toks,strt,end in link.scanString(htmlText):
- print(toks.asList())
-
-# Create dictionary from list comprehension, assembled from each pair of tokens returned
-# from a matched URL.
-pprint.pprint(
- dict((toks.body, toks.href) for toks,strt,end in link.scanString(htmlText))
- )
-
-
-
|