这个简单,就是熟悉下数据格式保存,反反爬,
https://mainsite-restapi.ele.me/v2/pois?
extras%5B%5D=count&geohash=wx4g0bmjetr7&keyword=%E6%9C%9D%E9%98%B3&limit=20&type=nearby
import urllib.request
import os
import json
from openpyxl import Workbook
from openpyxl import load_workbook
keywordExcel = "C:UsersuyDesktoppy3爬虫饿了么keyword.xlsx" # 关键字检索外卖地点保存路径
keywords = ["江干", "滨江"] # 关键字集合
def reqsetting(): # 首先构造请求头headers,url目前暂时保存根路径
# weburl = "https://mainsite-restapi.ele.me/v2/pois?"
weburl = "https://www.ele.me/restapi/v2/pois?"
# extras%5B%5D=count&geohash=wtmknpnr9yy3&keyword=%E6%BB%A8%E6%B1%9F&limit=20&type=nearby"
# extra1="extras%5B%5D=count&geohash=wx4g0bmjetr7&keyword=%E6%9C%9D%E9%98%B3&limit=20&type=nearby"
webheaders = {
"Accept": "application/json, text/plain, */*",
"Accept-Language": "zh-CN,zh;q=0.9,zh-TW;q=0.8",
"Connection": "keep-alive",
"Cookie": "ubt_ssid=ptvjtf67i9lr4uovi39wbvo83ty0239q_2019-02-18; _utrace=824a5a0d3496a33d798248e92c3d152f_2019-02-18; cna=PZ7vFIAQHgECAXueJlYerufe; track_id=1550466556|da0ddc135f632adfcaaeb3e72f35543e485d9b3b484492f856|898bc9f8ba51522ed41a4bd2fb7e039f; isg=BAIC-M_e6rep9_ZrR37SKPuYUwikeyfVgYwZokwaGXUon6kZNGPV_Qe-S5vjz36F",
"Host": "mainsite-restapi.ele.me",
"Origin": "https://www.ele.me",
"Referer": "https://www.ele.me/home/",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36"
}
req = urllib.request.Request(url=weburl, headers=webheaders)
return req
def write2Excel(jsondata, title): # 根据不同的关键字将数据写入到excel中
fileName = keywordExcel
if (os.path.exists(fileName)):
wb = load_workbook(fileName)
else:
wb = Workbook()
ws = wb.create_sheet(title)
ws.column_dimensions["A"].width = 10.0
ws.append(["ID", "城市", "geohash", "名称", "地址", "商家总数", "经度", "纬度", "request_id", "short_address"])
ws.column_dimensions["A"].width = 30.0
ws.column_dimensions["B"].width = 10.0
ws.column_dimensions["C"].width = 18.0
ws.column_dimensions["D"].width = 20.0
ws.column_dimensions["E"].width = 50.0
ws.column_dimensions["F"].width = 10.0
ws.column_dimensions["G"].width = 10.0
ws.column_dimensions["H"].width = 10.0
ws.column_dimensions["I"].width = 25.0
ws.column_dimensions["J"].width = 40.0
for i in range(len(jsondata)):
row = jsondata[i]
ws.append([row["id"], row["city"], row["geohash"], row["name"], row["address"], row["count"],
row["longitude"], row["latitude"], row["request_id"], row["short_address"]])
wb.save(fileName)
if name == 'main': # 程序运行入口
if (os.path.exists(keywordExcel)):
os.remove(keywordExcel)
req = reqsetting()
newUrl = req.get_full_url()
for keyword in keywords: # 遍历关键字集合,构造不同的请求参数,附加到URL 请求上
params = {
"extras[]": "count",
"geohash": "wtmknpnr9yy3",
"keyword": "%s" % keyword,
"limit": "20",
"type": "nearby"
}
params = urllib.parse.urlencode(params) # 将请求参数进行编码
req.full_url = newUrl + params # 重新构造请求参数
print(req.full_url)
webpage = urllib.request.urlopen(req.full_url) # 获取数据
contentBytes = webpage.read().decode("utf-8")
jsondata = json.loads(contentBytes) # 将数据解析成json格式
write2Excel(jsondata, keyword) # 将数据写入excel 中