zoukankan      html  css  js  c++  java
  • 爬取饿了么区域商家分布

    这个简单,就是熟悉下数据格式保存,反反爬,

    https://mainsite-restapi.ele.me/v2/pois?

    extras%5B%5D=count&geohash=wx4g0bmjetr7&keyword=%E6%9C%9D%E9%98%B3&limit=20&type=nearby

    import urllib.request
    import os
    import json
    from openpyxl import Workbook
    from openpyxl import load_workbook

    keywordExcel = "C:UsersuyDesktoppy3爬虫饿了么keyword.xlsx" # 关键字检索外卖地点保存路径

    keywords = ["江干", "滨江"] # 关键字集合

    def reqsetting(): # 首先构造请求头headers,url目前暂时保存根路径

    # weburl = "https://mainsite-restapi.ele.me/v2/pois?"
    weburl = "https://www.ele.me/restapi/v2/pois?"
            # extras%5B%5D=count&geohash=wtmknpnr9yy3&keyword=%E6%BB%A8%E6%B1%9F&limit=20&type=nearby"
    # extra1="extras%5B%5D=count&geohash=wx4g0bmjetr7&keyword=%E6%9C%9D%E9%98%B3&limit=20&type=nearby"
    
    webheaders = {
        "Accept": "application/json, text/plain, */*",
        "Accept-Language": "zh-CN,zh;q=0.9,zh-TW;q=0.8",
        "Connection": "keep-alive",
        "Cookie": "ubt_ssid=ptvjtf67i9lr4uovi39wbvo83ty0239q_2019-02-18; _utrace=824a5a0d3496a33d798248e92c3d152f_2019-02-18; cna=PZ7vFIAQHgECAXueJlYerufe; track_id=1550466556|da0ddc135f632adfcaaeb3e72f35543e485d9b3b484492f856|898bc9f8ba51522ed41a4bd2fb7e039f; isg=BAIC-M_e6rep9_ZrR37SKPuYUwikeyfVgYwZokwaGXUon6kZNGPV_Qe-S5vjz36F",
        "Host": "mainsite-restapi.ele.me",
        "Origin": "https://www.ele.me",
        "Referer": "https://www.ele.me/home/",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36"
    }
    req = urllib.request.Request(url=weburl, headers=webheaders)
    
    return req
    

    def write2Excel(jsondata, title): # 根据不同的关键字将数据写入到excel中
    fileName = keywordExcel
    if (os.path.exists(fileName)):
    wb = load_workbook(fileName)
    else:
    wb = Workbook()

    ws = wb.create_sheet(title)
    ws.column_dimensions["A"].width = 10.0
    ws.append(["ID", "城市", "geohash", "名称", "地址", "商家总数", "经度", "纬度", "request_id", "short_address"])
    ws.column_dimensions["A"].width = 30.0
    ws.column_dimensions["B"].width = 10.0
    ws.column_dimensions["C"].width = 18.0
    ws.column_dimensions["D"].width = 20.0
    ws.column_dimensions["E"].width = 50.0
    ws.column_dimensions["F"].width = 10.0
    ws.column_dimensions["G"].width = 10.0
    ws.column_dimensions["H"].width = 10.0
    ws.column_dimensions["I"].width = 25.0
    ws.column_dimensions["J"].width = 40.0
    
    for i in range(len(jsondata)):
        row = jsondata[i]
    
        ws.append([row["id"], row["city"], row["geohash"], row["name"], row["address"], row["count"],
                   row["longitude"], row["latitude"], row["request_id"], row["short_address"]])
    wb.save(fileName)
    

    if name == 'main': # 程序运行入口

    if (os.path.exists(keywordExcel)):
        os.remove(keywordExcel)
    req = reqsetting()
    newUrl = req.get_full_url()
    for keyword in keywords:  # 遍历关键字集合,构造不同的请求参数,附加到URL 请求上
        params = {
            "extras[]": "count",
            "geohash": "wtmknpnr9yy3",
            "keyword": "%s" % keyword,
            "limit": "20",
            "type": "nearby"
        }
        params = urllib.parse.urlencode(params)  # 将请求参数进行编码
        req.full_url = newUrl + params  # 重新构造请求参数
        print(req.full_url)
        webpage = urllib.request.urlopen(req.full_url)  # 获取数据
        contentBytes = webpage.read().decode("utf-8")
        jsondata = json.loads(contentBytes)  # 将数据解析成json格式
        write2Excel(jsondata, keyword)  # 将数据写入excel 中
  • 相关阅读:
    skyline(TEP,TerraGate) Fly文件的BS部署系列(2)SFS把shapefile发布WFS服务
    skyline TerraGate 1060 Could not start service Terrain Service 的解决方法
    skyline TerraBuilder(TB)处理不规则范围影像,去除空值,填充高程等
    skyline(TEP,TerraGate) Fly文件的BS部署系列(1)MPT部署
    北京2008奥运会吉祥物福娃大家庭
    网站进度条完美解决方案
    北京2008年奥运会体育图标
    很汗颜的linux第一篇
    dyld:Library not loaded
    如果一个按钮被覆盖如何响应?
  • 原文地址:https://www.cnblogs.com/WhiteCoder/p/10520550.html
Copyright © 2011-2022 走看看