diff options
author | Cengiz Kaygusuz <cngkaygusuz@gmail.com> | 2017-11-20 20:46:39 -0500 |
---|---|---|
committer | Cengiz Kaygusuz <cngkaygusuz@gmail.com> | 2017-11-20 20:46:39 -0500 |
commit | 27e183a78c8062ed7c2bbb91655a5e56cd697bba (patch) | |
tree | 88fd355a0cc6da4c130582e092d702836596cbb2 /examples/scanYahoo.py | |
parent | 4ba589cf13588e90992e23deb5a9784340efd2cc (diff) | |
download | pyparsing-git-27e183a78c8062ed7c2bbb91655a5e56cd697bba.tar.gz |
Move src to root
Diffstat (limited to 'examples/scanYahoo.py')
-rw-r--r-- | examples/scanYahoo.py | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/examples/scanYahoo.py b/examples/scanYahoo.py new file mode 100644 index 0000000..825c169 --- /dev/null +++ b/examples/scanYahoo.py @@ -0,0 +1,14 @@ +from pyparsing import makeHTMLTags,SkipTo,htmlComment
+import urllib.request, urllib.parse, urllib.error
+
+serverListPage = urllib.request.urlopen( "http://www.yahoo.com" )
+htmlText = serverListPage.read()
+serverListPage.close()
+
+aStart,aEnd = makeHTMLTags("A")
+
+link = aStart + SkipTo(aEnd).setResultsName("link") + aEnd
+link.ignore(htmlComment)
+
+for toks,start,end in link.scanString(htmlText):
+ print(toks.link, "->", toks.startA.href)
\ No newline at end of file |