import requests url = 'http://www.16xx8.com/' html = requests.get(url) print(html.content)
#html.content和html.text
content是bytes数据,包括图片等二进制数据;text是网页代码
content在python 2.7版本中可以顺利打印出网页代码;但是在Python3.6上面打印的中文是乱码,而且很卡,代码持续行状态
text在python 2.7版本中打印出网页代码中文乱码;在Python3.6上面打印的中文也是乱码
python3始终是乱码;解决办法
方法一
import requests html = requests.get('http://www.16xx8.com/') html.encoding = 'gb2312' print(html.text)
方法二
import requests url = 'http://www.16xx8.com/' html = requests.get(url) if html.encoding == 'ISO-8859-1': encodings = requests.utils.get_encodings_from_content(html.text) if encodings: encoding = encodings[0] else: encoding = html.apparent_encoding global encode_content encode_content = html.content.decode(encoding, 'replace') print(encode_content)