requests实战之搜索引擎爬取搜索内容
import requests
#指定url
url='https://www.sogou.com/web'
kw=input('enter a word: ')
header={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/'
}
param={
'query':kw
}
#发起请求
#UA伪装
response=requests.get(url=url,params=param,headers=header)
#获取相应数据
content=response.text
fileName=kw+'.html'
#将数据保存在本地
with open(fileName,'w',encoding='utf-8') as fp:
fp.write(content)
print(fileName,'爬取结束!!!')
requests实战之破解百度翻译
import json
import requests
url='https://fanyi.baidu.com/sug'
word=input('请输入想翻译的词语或句子:')
data={
'kw':word
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
}
reponse=requests.post(url=url,data=data,headers=headers)
dic_obj=reponse.json()
print(dic_obj)
filename=word+'.json'
with open(filename,'w',encoding='utf-8') as fp:
json.dump(dic_obj,fp=fp,ensure_ascii=False)
print('爬取结束!!!')
requests实战之爬取豆瓣电影榜单
import json
import requests
url='https://movie.douban.com/j/chart/top_list?'
params={
'type': '11',
'interval_id': '100:90',
'action': '',
'start': '0',
'limit': '20',
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
}
reponse=requests.get(url=url,params=params,headers=headers)
dic_obj=reponse.json()
print(dic_obj)
with open('douban.json','w',encoding='utf-8') as fp:
json.dump(dic_obj,fp=fp,ensure_ascii=False)
print('爬取结束!!!')
requests实战之爬取肯德基门店地址
import json
import requests
url='http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
location=input('请输入你想查询的地点:')
data={
'cname':'',
'pid':'',
'keyword': location,
'pageIndex': '1',
'pageSize': '10',
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
}
reponse=requests.post(url=url,data=data,headers=headers)
content=reponse.text
with open(location+'.html','w',encoding='utf-8') as fp:
fp.write(content)
print('爬取结束!!!')
requests实战之药监总局相关数据
import json
import requests
url='http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList'
id_list = []
all_information=[]
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
}
for page in range(1,6):
page=str(page)
data={
'on': 'true',
'page': page,
'pageSize': '15',
'productName': '',
'conditionType': '1',
'applyname': '',
'applysn': '',
}
response = requests.post(url=url, data=data, headers=headers).json()
for dic in response['list']:
id_list.append(dic['ID'])
print('爬取商家id结束')
url1='http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsById'
for id in id_list:
data = {
'id': id
}
reponse=requests.post(url=url1,data=data,headers=headers).json()
all_information.append(reponse)
with open('information.json','w',encoding='utf-8') as fp:
json.dump(all_information,fp=fp,ensure_ascii='utf-8')
print('爬取商家具体信息结束!!!')
参考路飞学社视频