工具:python3
目标:传递关键字,爬取任意关键字的页面
import urllib.request
# 定义User-Agent,要爬取的url,以及要查询的关键字
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"}
url = "http://www.baidu.com/s"
kd = input("请输入要查询的关键字:")
# 将关键字进行url编码,传递到urlencode()中的字符类型为字典
wd = {"wd": kd}
wd = urllib.parse.urlencode(wd)
# 构造完整的url
fullurl = url + "?" + wd
print(fullurl)
# 构造服务器请求信息
request = urllib.request.Request(fullurl, headers=headers)
print(request)
# 向服务器发送请求
response = urllib.request.urlopen(request)
# 将爬取到的内容存入文件baidu.txt
content = response.read()
f = open("baidu.txt", "w")
f.write(str(content))
f.close()