blob: 9ecb5e9799ebea1f90d29d7c0e5c43bb157fffc0 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
from pyparsing import makeHTMLTags,SkipTo,htmlComment
import urllib.request, urllib.parse, urllib.error
serverListPage = urllib.request.urlopen( "https://www.yahoo.com/" )
htmlText = serverListPage.read()
serverListPage.close()
aStart,aEnd = makeHTMLTags("A")
link = aStart + SkipTo(aEnd).setResultsName("link") + aEnd
link.ignore(htmlComment)
for toks,start,end in link.scanString(htmlText):
print(toks.link, "->", toks.startA.href)
|