一、get和post请求获取数据
#获取网址源码
import urllib.request
response = urllib.request.urlopen("https://www.baidu.com/")
print(response.read().decode('utf-8'))
#get请求
import urllib.request
import urllib.parse
response = urllib.request.urlopen("http://httpbin.org/get")
print(response.read().decode("utf-8"))
#post请求
import urllib.request
import urllib.parse
data = bytes(urllib.parse.urlencode({"hello":"world"}),encoding="utf-8")
response = urllib.request.urlopen("http://httpbin.org/post",data=data)
print(response.read().decode("utf-8"))
二、超时处理
import urllib.request
import urllib.parse
try:
response = urllib.request.urlopen("http://httpbin.org/get",timeout = 0.01)
print(response.read().decode("utf-8"))
except urllib.error.URLError as e:
print("timeout!")
三、获取状态码
import urllib.request
import urllib.parse
response = urllib.request.urlopen("http://httpbin.org/get")
print(response.status)
四、浏览器伪装
1.找到浏览器信息
2.封装信息
import urllib.request
import urllib.parse
url = "https://www.douban.com"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36"
}
req = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(req)
print(response.read().decode("utf-8"))