urllib.request基本使用用法
(1)基本用法
url = "http://www.baidu.com/"
# 获取response对象
response = urllib.request.urlopen(url)
# 读取内容 bytes类型
data = response.read()
# 转化为字符串
str_data = data.decode("utf-8")
# 字符串转化为bytes
str_name = "baidu"
bytes_name =str_name.encode("utf-8")
(2)url参数中含有汉字的需要转译
url = "http://www.baidu.com/s?wd="
name = "python中含有汉字"
final_url = url + name
#网址里面包含了汉字;ascii是没有汉字的;url转译
#将包含汉字的网址进行转译
encode_new_url = urllib.parse.quote(final_url,safe=string.printable)
# 使用代码发送网络请求
response = urllib.request.urlopen(encode_new_url)
print(response)
#读取内容
data = response.read().decode()
#保存到本地
with open("02-encode.html","w",encoding="utf-8")as f:
f.write(data)
(3)传入字典类型的参数
url = "http://www.baidu.com/s?"
params = {
"wd":"中文",
"key":"zhang",
"value":"san"
}
# 字典类型转译成参数
str_params = urllib.parse.urlencode(params)
final_url = url + str_params
# 将带有中文的url 转译成计算机可以识别的url
end_url = urllib.parse.quote(final_url,safe=string.printable)
response = urllib.request.urlopen(end_url)
data = response.read().decode("utf-8")
print(data)
(4)添加header
第一种添加header的方式
url = "https://www.baidu.com"
# 添加请求头的信息
headers = {
# 浏览器的版本
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'name':'chen'
}
# 创建请求对象
request = urllib.request.Request(url, headers=headers)
第二种添加header的方式:动态添加
url = "https://www.baidu.com"
# 创建请求对象
request = urllib.request.Request(url)
# 动态添加请求头信息
request.add_header("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36")
# 请求网络数据
response = urllib.request.urlopen(request)
data = response.read().decode("utf-8")
# 获取到完整的url
final_url = request.get_full_url()
print(final_url)
with open("baidu.html", "w", encoding="utf-8") as f:
f.write(data)
# 获取请求头的信息
request_headers = request.headers
print(request_headers)
user_agent = request.get_header('User-agent')
print(user_agent)
# 响应头
print(response.headers)
(5)使用代理
url = 'https://www.cnblogs.com/chenshy'
# 添加代理
proxy = {
'http': '119.102.25.91:9999'
}
# 代理处理器
proxy_handler = urllib.request.ProxyHandler(proxy)
# 创建自己的opener
opener = urllib.request.build_opener(proxy_handler)
# 拿着代理ip发送请求
data = opener.open(url).read().decode("utf-8")
print(data)
(6) cookie
a.在头部添加cookie
url = 'https://www.yaozh.com/member/'
headers = {'User_Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'Cookie':'acw_tc=707c9fd115550786016938465e492bb70702a65dacd78c0969a841171ddf8d; PHPSESSID=t4rb1af4vmks8gete5oqfd6ub7; _ga=GA1.2.521923122.1555078606; _gid=GA1.2.523976398.1555078606; Hm_lvt_65968db3ac154c3089d7f9a4cbb98c94=1555078606; MEIQIA_VISIT_ID=1JlnZOkmbJhJwfRjhyv0gTMf14i; MEIQIA_EXTRA_TRACK_ID=1JlnZL47AayFGs373mZAapsuPKv; yaozh_logintime=1555078687; yaozh_user=729821%09lifelover; yaozh_userId=729821; _gat=1; Hm_lpvt_65968db3ac154c3089d7f9a4cbb98c94=1555078691; yaozh_uidhas=1; yaozh_mylogin=1555078693; acw_tc=707c9fd115550786016938465e492bb70702a65dacd78c0969a841171ddf8d; MEIQIA_VISIT_ID=1JlnZOkmbJhJwfRjhyv0gTMf14i; MEIQIA_EXTRA_TRACK_ID=1JlnZL47AayFGs373mZAapsuPKv'}
request = urllib.request.Request(url,headers=headers)
response = urllib.request.urlopen(request)
data = response.read().decode('utf-8')
print(data)
b.登录之后获取cookie ,cookiejar的使用
import urllib.request
from http import cookiejar
from urllib import parse
def login():
# 1.代码登录 获取cookie 带着cookies请求个人中心
url = 'https://www.yaozh.com/login'
# 登录的参数
login_form_data = {
'username': 'lifelover',
'pwd': 'chen19960319',
'formhash': 'F456373F7B',
'backurl': 'https%3A%2F%2Fwww.yaozh.com%2F'
}
# 发送登录请求
cook_jar = cookiejar.CookieJar()
cookie_handler = urllib.request.HTTPCookieProcessor(cook_jar)
opener = urllib.request.build_opener(cookie_handler)
# 带着参数,发送post请求
headers = {'User_Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
# 1.需要转译,转码 2.需要byte类型
login_str = urllib.parse.urlencode(login_form_data).encode('utf-8')
request = urllib.request.Request(url,headers=headers,data=login_str)
# 如果登录成功,cookiejar自动保存cookie
response = opener.open(request)
# 带着cookie请求个人中心
center = 'https://www.yaozh.com/member/'
center_request = urllib.request.Request(center,headers=headers)
response = opener.open(center_request)
data = response.read().decode('utf-8')
with open('test.html','w',encoding='utf-8') as f:
f.write(data)