zoukankan      html  css  js  c++  java
  • 拼接省份

    import re
    import json
    from odps import ODPS
    from threading import Thread
    import threading
    from urllib import parse
    import datetime
    from lxml import etree

    import random 
    import requests
    import time

    from models import *

    district_url = "https://hotel.qunar.com/napi/seo?path=%2Fseo%2Fnav&city="

    def read_city():
        city_list = regions.select()
        province = []
        province_city = []
        for data in city_list:
            # if data.pre_code == 86 and '市' in data.area_name: # 此处代码确认直辖市
            #     print(data.area_name,data.area_code)
            if '0000' in str(data.area_code):
                province.append([data.area_name,data.area_code])
            # if '0000' in str(data.pre_code):
            #     print(data.area_name,data.area_code)
        for item in province:
            for data in city_list:
                if data.pre_code == item[1]:
                    if data.area_name == "市辖区":
                        #print(item[0],item[0])
                        province_city.append([item[0],item[0]])
                    else :
                        #print(item[0],data.area_name)
                        province_city.append([item[0],data.area_name])

        return province_city

    # # 获取城市数据,存储,利用获取到的json文本数据
    # def save_city_list():
    #     with open('cityList.json','r',encoding='utf8')as fp:
    #         json_data = json.load(fp)
    #         for data in json_data:
    #             for data_0 in data:
    #                 for data_value in data_0['value']:
    #                     district_url_0 = district_url + str(data_value['url'])
    #                     response = requests.request("GET", district_url_0)
    #                     json_city = json.loads(response.text)
    #                     #if len(json_city) > 0 and len(json_city['data'] > 0):
    #                     try:
    #                         if  (json_city['data'][0]["name"] == data_value['name'] + "行政区酒店") and (json_city['data'][0]['type'] == "city"):
    #                             for item in json_city['data'][0]['list']:
    #                                 data_i = item["name"].split("酒店")[0]
    #                                 data_i = data_i.split(" ")[0]
    #                                 catalogue = qunar_List_City()
    #                                 catalogue.district_name = data_i # 行政区域名字
    #                                 catalogue.district_spell = item['id'] # 行政区域拼音
    #                                 catalogue.city_name = data_value['name'] # 城市名称
    #                                 catalogue.city_spell = data_value['url'] # 城市拼音
    #                                 catalogue.create_time = datetime.datetime.now() # 抓取时间
    #                                 existed_id = qunar_List_City.select().where(qunar_List_City.district_spell==item['id'])
    #                                 if existed_id:
    #                                     pass  
    #                                 else:
    #                                     catalogue.save(force_insert=True)
    #                         else:
    #                             catalogue = qunar_List_City()
    #                             catalogue.district_name = data_value['name'] # 行政区域名字
    #                             catalogue.district_spell = data_value['url'] # 行政区域拼音
    #                             catalogue.city_name = data_value['name'] # 城市名称
    #                             catalogue.city_spell = data_value['url'] # 城市拼音
    #                             catalogue.create_time = datetime.datetime.now() # 抓取时间
    #                             existed_id = qunar_List_City.select().where(qunar_List_City.city_name==data_value['name'])
    #                             if existed_id:
    #                                 pass  
    #                             else:
    #                                 catalogue.save(force_insert=True)
    #                     except:
    #                         #print(response.status_code)
    #                         print("非大陆数据或者城市酒店数据为空")
    #                         print(district_url_0)  

    # 获取城市数据,存储,利用获取到的json文本数据
    def save_city_list(pro_city):
        with open('cityList.json','r',encoding='utf8')as fp:
            json_data = json.load(fp)
            for data in json_data:
                for data_0 in data:
                    for data_value in data_0['value']:
                        province = " "
                        for city_name in pro_city:
                            if data_value['name'] in city_name[1]:
                                province = city_name[0]

                        district_url_0 = district_url + str(data_value['url'])
                        response = requests.request("GET", district_url_0)
                        json_city = json.loads(response.text)

                        # if  (json_city['data'][0]["name"] == data_value['name'] + "行政区酒店") and (json_city['data'][0]['type'] == "city"):
                        #     for item in json_city['data'][0]['list']:
                        #         data_i = item["name"].split("酒店")[0]
                        #         data_i = data_i.split(" ")[0]
                        #         catalogue = List_City()
                        #         catalogue.province_name = province
                        #         catalogue.district_name = data_i # 行政区域名字
                        #         catalogue.district_spell = item['id'] # 行政区域拼音
                        #         catalogue.city_name = data_value['name'] # 城市名称
                        #         catalogue.city_spell = data_value['url'] # 城市拼音
                        #         catalogue.create_time = datetime.datetime.now() # 抓取时间
                        #         existed_id = List_City.select().where(List_City.district_spell==item['id'])
                        #         if existed_id:
                        #             pass  
                        #         else:
                        #             catalogue.save(force_insert=True)

                        #if len(json_city) > 0 and len(json_city['data'] > 0):
                        try:
                            if  (json_city['data'][0]["name"] == data_value['name'] + "行政区酒店") and (json_city['data'][0]['type'] == "city"):
                                for item in json_city['data'][0]['list']:
                                    data_i = item["name"].split("酒店")[0]
                                    data_i = data_i.split(" ")[0]
                                    catalogue = List_City()
                                    catalogue.province_name = province
                                    catalogue.district_name = data_i # 行政区域名字
                                    catalogue.district_spell = item['id'] # 行政区域拼音
                                    catalogue.city_name = data_value['name'] # 城市名称
                                    catalogue.city_spell = data_value['url'] # 城市拼音
                                    catalogue.create_time = datetime.datetime.now() # 抓取时间
                                    existed_id = List_City.select().where(List_City.district_spell==item['id'])
                                    if existed_id:
                                        pass  
                                    else:
                                        catalogue.save(force_insert=True)
                            # else:
                            #     catalogue = List_City()
                            #     catalogue.province_name = province
                            #     catalogue.district_name = data_value['name'] # 行政区域名字
                            #     catalogue.district_spell = data_value['url'] # 行政区域拼音
                            #     catalogue.city_name = data_value['name'] # 城市名称
                            #     catalogue.city_spell = data_value['url'] # 城市拼音
                            #     catalogue.create_time = datetime.datetime.now() # 抓取时间
                            #     existed_id = List_City.select().where(List_City.city_name==data_value['name'])
                            #     if existed_id:
                            #         pass  
                            #     else:
                            #         catalogue.save(force_insert=True)
                        except:
                            #print(response.status_code)
                            print("非大陆数据或者城市酒店数据为空")
                            print(district_url_0) 

    if __name__ == "__main__":
        #create_tables()
        pro_city = read_city()
        save_city_list(pro_city)
        
  • 相关阅读:
    [ZJOI2008]树的统计 树链剖分
    CF915E 动态开线段树
    Poj 2114 Boatherds(点分治)
    Poj 2599 Godfather(树的重心)
    Bzoj 2152: 聪聪可可(点分治)
    Cogs 1714. [POJ1741][男人八题]树上的点对(点分治)
    Cogs 329. K- 联赛(最大流)
    Cogs 731. [网络流24题] 最长递增子序列(最大流)
    Bzoj 2282: [Sdoi2011]消防(二分答案)
    Cogs 732. [网络流24题] 试题库(二分图)
  • 原文地址:https://www.cnblogs.com/dog-and-cat/p/13615479.html
Copyright © 2011-2022 走看看