题目:一个html文件,找出里面的链接
代码:
from html.parser import HTMLParser import urllib.request class myhtml(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.flag = 0 self.links = [] def handle_starttag(self,tag,attrs): if tag == "a": for name,value in attrs: if name == "href": self.links.append(value) if __name__ == "__main__": parser = myhtml() myurl = "https://www.cnblogs.com/pinpin" html = urllib.request.urlopen(myurl) html_connect =html.read() html_connect = bytes.decode(html_connect) parser.feed(html_connect) print(parser.links)