# 关于html.parse.HTMLParser的使用 from html.parser import HTMLParser class MyHtmlParser(HTMLParser): # 使用“<!DOCTYPE html>”将会被调出来 def handle_decl(self, decl): # 实现基类的方法 HTMLParser.handle_decl(self, decl) # 自定义输出 print('decl {}'.format(decl)) # 开始标签 def handle_starttag(self, tag, attrs): HTMLParser.handle_starttag(self, tag, attrs) print("start tag is <{}>".format(tag)) # 结束标签 def handle_endtag(self, tag): HTMLParser.handle_endtag(self, tag) print("end tag is </{}>".format(tag)) # 打印数据 def handle_data(self, data): HTMLParser.handle_data(self, data) print('data is {}'.format(data)) # 打印单标签 def handle_startendtag(self, tag, attrs): HTMLParser.handle_startendtag(self, tag, attrs) print('单标签: {}'.format(tag)) # 打印注释 def handle_comment(self, data): HTMLParser.handle_comment(self, data) print("comment is /*{}*/".format(data)) # 关闭 def close(self): HTMLParser.close(self) print('Close...') my_html_parser = MyHtmlParser() # 调用解析函数 # HTMLParser.feed()进行解析 my_html_parser.feed("<html><head><title>Test</title></head><body><h1>Parse me!<br /></h1></body></html>") my_html_parser.close() # 文件结束的处理方法,貌似可以释放缓冲区 """ D:笔记python电子书Python3>python index.py start tag is <html> start tag is <head> start tag is <title> data is Test end tag is </title> end tag is </head> start tag is <body> start tag is <h1> data is Parse me! start tag is <br> end tag is </br> 单标签: br end tag is </h1> end tag is </body> end tag is </html> Close... """ """ 附加笔记: HTMLParser.reset():重置实例,丢失所有未处理的数据,这被称为隐式实例化时间 HTMLParser.getpos():返回当前行数和偏移量信息 """