1.需求:爬取豆瓣电影分类排行榜 https://movie.douban.com/中的电影详情数据
(此处以抓取科幻电影分类的信息为例)
import requests import json url = 'https://movie.douban.com/j/new_search_subjects' params= { 'sort': 'U', 'range': '0,10', 'tags':'' , 'start': '40', 'genres': '科幻' } headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36' } response = requests.get(url=url, params=params, headers=headers) ret = response.json() fileName = 'files/'+ 'douban.json' with open(fileName,'w',encoding='utf-8') as f: f.write(json.dumps(ret, ensure_ascii=False)) print('work is done')
2.爬取肯德基餐厅查询http://www.kfc.com.cn/kfccda/index.aspx中指定地点的餐厅数据
import requests import json if __name__ == '__main__': url ='http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword' keyword = input('请输入搜索地点: ') data={ 'cname':'', 'pid': '', 'keyword': keyword, 'pageIndex': 1, 'pageSize': 10, } headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36' } response = requests.post(url=url, data=data, headers=headers) page_info = response.text print(page_info) fileName = 'files/KFC.json' with open(fileName, 'w', encoding='utf-8') as f: f.write(json.dumps(page_info, ensure_ascii=False)) print('work is done')