zoukankan      html  css  js  c++  java
  • 2011年到2017年11月的 2345_历史天气爬虫

    # -*- coding:utf-8 -*-
    import requests
    import time
    import json
    import re
    import demjson
    import xlwt
    from multiprocessing.pool import ThreadPool #线程池

    class History_weather():
    def __init__(self):
    self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'}
    self.f = xlwt.Workbook(encoding='utf-8')

    # 创建一个单表 sheet1, 在单表里面插入
    self.sheet1 = self.f.add_sheet(u'sheet1', cell_overwrite_ok=True)


    def join_url(self):
    li = ['57048', '70561', '70569', '60549', '53845', '60387', '70575', '70593', '71030', '60386', '57036', '60540', '60967', '60384', '70598', '70590', '57245', '70566', '70580', '70597', '70574', '60968', '60969', '71031', '60964', '60956', '71270', '57127', '60542', '70602', '71664', '70595', '70599', '57016', '71225', '70582', '70581', '70564', '60328', '70565', '70579', '70603', '60958', '71660', '71665', '60546', '70560', '70586', '71667', '60538', '60963', '70600', '60960', '70594', '70604', '71653', '70576', '60385', '60961', '70577', '71663', '70584', '60541', '60547', '60383', '53646', '71662', '71658', '70572', '70559', '70591', '71656', '70596', '71657', '70558', '60970', '71275', '71273', '60544', '70567', '71269', '70570', '70571', '71651', '70585', '71654', '60545', '60537', '53947', '70589', '60679', '71652', '60543', '71199', '60536', '70573', '57045', '60966', '60548', '71666', '70588', '60965', '70568', '60957', '70592', '70601', '70587', '71655', '60539', '60329', '70557', '70583', '70563', '70562', '60331', '60962', '71661', '71650', '60330', '71659']
    #print(li)
    url_start = 'http://tianqi.2345.com/t/wea_history/js/'
    # url_end = '_20111.js'
    url_year = '_20'
    #li_year = [11]
    #li_month = [1]

    li_year = [11, 12, 13, 14, 15, 16, 17]
    li_month = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
    url_js = '.js'
    for i in li:
    i = str(i)
    for j in li_year:
    j = str(j)
    for k in li_month:
    k = str(k)
    if j == '17' and k == '12':
    break
    else:
    url = url_start + i + url_year + j + k + url_js
    # print(url)
    self.get_url(url)
    def get_url(self,url):
    try:
    r = requests.get(url, headers=self.headers)
    if r.status_code == 200:
    self.parser_url(r.text)
    else:
    time.sleep(0.1)
    r = requests.get(url, headers=self.headers)
    if r.status_code == 200:
    self.parser_url(r.text)
    else:
    time.sleep(0.1)
    r = requests.get(url, headers=self.headers)
    if r.status_code == 200:
    self.parser_url(r.text)
    else:
    return 0
    except Exception as e: # except BaseException 这个也可以 e是打印出错误的原因
    # print("json问题",e)
    pass
    # print("url:",url)
    # try:
    # r = requests.get(url,headers = self.headers)
    # if r.status_code == 200:
    # self.parser_url(r.text)
    # else:
    # time.sleep(0.1)
    # return self.get_url(url)
    # except Exception as e: # except BaseException 这个也可以 e是打印出错误的原因
    # #print("json问题",e)
    # pass



    def parser_url(self,r):
    global num
    r = re.findall(r'var weather_str=(.*?);',r)[0]
    json_r = demjson.decode(r)
    li_dict = json_r['tqInfo']
    for i in li_dict:
    li = []
    #print("i***********------------",type(i))
    li.append(json_r['city'])
    li.append(i['ymd']) #时间
    li.append(i['tianqi']) #天气
    li.append(i['bWendu']) #最高温
    li.append(i['yWendu']) #最低温
    li.append(i['fengli']) #风力
    li.append(i['fengxiang']) #风向
    print(li)
    num+=1
    j = 0
    for i in li:
    self.sheet1.write(num, j, i) # 把li的数据按照 num行来插入, (这三个参数分别是行、列、值)
    j += 1
    #print("天气情况",li)




    if __name__ == '__main__':
    num = 0
    aa = History_weather()
    pool = ThreadPool(32) # 实现一个线程池 ,参数是线程的数量, 这里就是两个线程等待调用
    pool.apply_async(aa.join_url) # 这个线程池传参很精髓
    pool.close() # 关闭线程池, 不在提交任务,
    pool.join() # 等待线程池里面的任务 运行完毕
    aa.f.save(r'e:excel_finally_1.xls') # 保存
  • 相关阅读:
    .NETframework的EF框架学习报错之datetime 数据类型
    String...的用法
    存储过程从入门到熟练(c#篇)
    售前如何做好产品演示
    华为演讲培训售前人员重点学习
    report services 报表开发和部署,集成到解决方案中 全解析
    在Asp.net用C#建立动态Excel(外文翻译)
    NET(C#)连接各类数据库集锦
    在SourceForge.net上如何使用TortoiseCVS
    用C#实现在线升级
  • 原文地址:https://www.cnblogs.com/yuanjia8888/p/9039369.html
Copyright © 2011-2022 走看看