zoukankan      html  css  js  c++  java
  • 百度地图商家爬虫

    import requests,json
    from bs4 import BeautifulSoup
    import pandas
    
    aa=['''http://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=con&from=webmap&c=131&wd=%E5%81%A5%E8%BA%AB&wd2=&pn={}&nn=70&db=0&sug=0&addr=0&pl_data_type=life&pl_sub_type=%E5%81%A5%E8%BA%AB-&pl_price_section=0%2C%2B&pl_sort_type=data_type&pl_sort_rule=0&pl_discount2_section=0%2C%2B&pl_groupon_section=0%2C%2B&pl_cater_book_pc_section=0%2C%2B&pl_hotel_book_pc_section=0%2C%2B&pl_ticket_book_flag_section=0%2C%2B&pl_movie_book_section=0%2C%2B&pl_business_type=life&pl_business_id=&da_src=pcmappg.poi.page&on_gel=1&src=7&gr=3&l=10&tn=B_NORMAL_MAP&u_loc=12947364,4845057&ie=utf-8&b=(12944120,4766193;12995064,4922865)&t=1501815552268'''.format(i +1) for i in range(2103)]
    
    
    all_data=[]
    
    def url_parser(url):
        res = requests.get(url)
        soup = BeautifulSoup(res.text,'lxml')
        data = str(soup).lstrip('<html><body><p>').rstrip('</font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></font></p></body></html>')
        jdata = json.loads(data)
    
        for i in jdata['content']:
            fdata = {
                '名称':i['name'],
                '地址':i['addr'],
            }
            try:
                fdata['电话']=i['tel']
            except Exception as e :
                fdata['电话'] = 0
    
            all_data.append(fdata)
    
    for c in aa:
        url_parser(c)
        print(len(all_data))
    
    df =pandas.DataFrame(all_data)
    df.to_excel('jianshen.xlsx',index=False)
  • 相关阅读:
    vim 的配置文件
    linux bash 的自动补全
    linux ping 命令
    linux 安装ifconfig
    dos exist 命令
    linux 用户的添加,组的添加,以及查看
    if else 的.bat 文件
    For 的.bat文件
    rmdir 的.bat文件
    dos set 命令
  • 原文地址:https://www.cnblogs.com/Erick-L/p/7284425.html
Copyright © 2011-2022 走看看