zoukankan      html  css  js  c++  java
  • 拼接省份

    import re
    import json
    from odps import ODPS
    from threading import Thread
    import threading
    from urllib import parse
    import datetime
    from lxml import etree

    import random 
    import requests
    import time

    from models import *

    district_url = "https://hotel.qunar.com/napi/seo?path=%2Fseo%2Fnav&city="

    def read_city():
        city_list = regions.select()
        province = []
        province_city = []
        for data in city_list:
            # if data.pre_code == 86 and '市' in data.area_name: # 此处代码确认直辖市
            #     print(data.area_name,data.area_code)
            if '0000' in str(data.area_code):
                province.append([data.area_name,data.area_code])
            # if '0000' in str(data.pre_code):
            #     print(data.area_name,data.area_code)
        for item in province:
            for data in city_list:
                if data.pre_code == item[1]:
                    if data.area_name == "市辖区":
                        #print(item[0],item[0])
                        province_city.append([item[0],item[0]])
                    else :
                        #print(item[0],data.area_name)
                        province_city.append([item[0],data.area_name])

        return province_city

    # # 获取城市数据,存储,利用获取到的json文本数据
    # def save_city_list():
    #     with open('cityList.json','r',encoding='utf8')as fp:
    #         json_data = json.load(fp)
    #         for data in json_data:
    #             for data_0 in data:
    #                 for data_value in data_0['value']:
    #                     district_url_0 = district_url + str(data_value['url'])
    #                     response = requests.request("GET", district_url_0)
    #                     json_city = json.loads(response.text)
    #                     #if len(json_city) > 0 and len(json_city['data'] > 0):
    #                     try:
    #                         if  (json_city['data'][0]["name"] == data_value['name'] + "行政区酒店") and (json_city['data'][0]['type'] == "city"):
    #                             for item in json_city['data'][0]['list']:
    #                                 data_i = item["name"].split("酒店")[0]
    #                                 data_i = data_i.split(" ")[0]
    #                                 catalogue = qunar_List_City()
    #                                 catalogue.district_name = data_i # 行政区域名字
    #                                 catalogue.district_spell = item['id'] # 行政区域拼音
    #                                 catalogue.city_name = data_value['name'] # 城市名称
    #                                 catalogue.city_spell = data_value['url'] # 城市拼音
    #                                 catalogue.create_time = datetime.datetime.now() # 抓取时间
    #                                 existed_id = qunar_List_City.select().where(qunar_List_City.district_spell==item['id'])
    #                                 if existed_id:
    #                                     pass  
    #                                 else:
    #                                     catalogue.save(force_insert=True)
    #                         else:
    #                             catalogue = qunar_List_City()
    #                             catalogue.district_name = data_value['name'] # 行政区域名字
    #                             catalogue.district_spell = data_value['url'] # 行政区域拼音
    #                             catalogue.city_name = data_value['name'] # 城市名称
    #                             catalogue.city_spell = data_value['url'] # 城市拼音
    #                             catalogue.create_time = datetime.datetime.now() # 抓取时间
    #                             existed_id = qunar_List_City.select().where(qunar_List_City.city_name==data_value['name'])
    #                             if existed_id:
    #                                 pass  
    #                             else:
    #                                 catalogue.save(force_insert=True)
    #                     except:
    #                         #print(response.status_code)
    #                         print("非大陆数据或者城市酒店数据为空")
    #                         print(district_url_0)  

    # 获取城市数据,存储,利用获取到的json文本数据
    def save_city_list(pro_city):
        with open('cityList.json','r',encoding='utf8')as fp:
            json_data = json.load(fp)
            for data in json_data:
                for data_0 in data:
                    for data_value in data_0['value']:
                        province = " "
                        for city_name in pro_city:
                            if data_value['name'] in city_name[1]:
                                province = city_name[0]

                        district_url_0 = district_url + str(data_value['url'])
                        response = requests.request("GET", district_url_0)
                        json_city = json.loads(response.text)

                        # if  (json_city['data'][0]["name"] == data_value['name'] + "行政区酒店") and (json_city['data'][0]['type'] == "city"):
                        #     for item in json_city['data'][0]['list']:
                        #         data_i = item["name"].split("酒店")[0]
                        #         data_i = data_i.split(" ")[0]
                        #         catalogue = List_City()
                        #         catalogue.province_name = province
                        #         catalogue.district_name = data_i # 行政区域名字
                        #         catalogue.district_spell = item['id'] # 行政区域拼音
                        #         catalogue.city_name = data_value['name'] # 城市名称
                        #         catalogue.city_spell = data_value['url'] # 城市拼音
                        #         catalogue.create_time = datetime.datetime.now() # 抓取时间
                        #         existed_id = List_City.select().where(List_City.district_spell==item['id'])
                        #         if existed_id:
                        #             pass  
                        #         else:
                        #             catalogue.save(force_insert=True)

                        #if len(json_city) > 0 and len(json_city['data'] > 0):
                        try:
                            if  (json_city['data'][0]["name"] == data_value['name'] + "行政区酒店") and (json_city['data'][0]['type'] == "city"):
                                for item in json_city['data'][0]['list']:
                                    data_i = item["name"].split("酒店")[0]
                                    data_i = data_i.split(" ")[0]
                                    catalogue = List_City()
                                    catalogue.province_name = province
                                    catalogue.district_name = data_i # 行政区域名字
                                    catalogue.district_spell = item['id'] # 行政区域拼音
                                    catalogue.city_name = data_value['name'] # 城市名称
                                    catalogue.city_spell = data_value['url'] # 城市拼音
                                    catalogue.create_time = datetime.datetime.now() # 抓取时间
                                    existed_id = List_City.select().where(List_City.district_spell==item['id'])
                                    if existed_id:
                                        pass  
                                    else:
                                        catalogue.save(force_insert=True)
                            # else:
                            #     catalogue = List_City()
                            #     catalogue.province_name = province
                            #     catalogue.district_name = data_value['name'] # 行政区域名字
                            #     catalogue.district_spell = data_value['url'] # 行政区域拼音
                            #     catalogue.city_name = data_value['name'] # 城市名称
                            #     catalogue.city_spell = data_value['url'] # 城市拼音
                            #     catalogue.create_time = datetime.datetime.now() # 抓取时间
                            #     existed_id = List_City.select().where(List_City.city_name==data_value['name'])
                            #     if existed_id:
                            #         pass  
                            #     else:
                            #         catalogue.save(force_insert=True)
                        except:
                            #print(response.status_code)
                            print("非大陆数据或者城市酒店数据为空")
                            print(district_url_0) 

    if __name__ == "__main__":
        #create_tables()
        pro_city = read_city()
        save_city_list(pro_city)
        
  • 相关阅读:
    Worktile 技术架构概要
    开发,从未如此清晰
    花一分钟来看看Worktile是如何为团队协作而生的
    Worktile协同特色之二:任务看板管理
    Worktile协同特色之一:无处不在的关注
    老板的宽容是一味害人的毒药——慈不带兵,善不经商,这个社会的法则就是弱肉强食
    以大多数人的努力程度之低来看,根本还轮不到拼天赋。时间管理。薪水是最低级的目标。比薪水更重要的,是成长和成就自己的机会。
    小豆君:你的目标是让其它工具为你服务,你要踩在巨人的肩膀上创造世界(摒弃掉你的好奇心,千万不要去追求第三方类或工具是怎么实现的,这往往会让你收效甚微,其实,你只需要熟练掌握它的接口,知道类的目的即可,不可犯面向过程的毛病)
    周末没事干就看CSS JS Python ThinkPHP的书,照着例子运行就行,可以增强信心(还有QML,虚拟机运行Web。Windows核心编程,照着例子运行。没事看看socket和rest的例子和文档,还有POCO和BOOST)
    离开华为三年,我才真正认同狼性法则(目标导向,没有借口,都是为自己的懒惰与不肯死磕找借口)
  • 原文地址:https://www.cnblogs.com/dog-and-cat/p/13615479.html
Copyright © 2011-2022 走看看